{
"cells": [
{
"cell_type": "markdown",
"id": "8cfb37b5",
"metadata": {},
"source": [
"# 4omini query"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "2a8e89d4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n",
" warnings.warn(\n"
]
}
],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from evoagentx.benchmark import PubMedQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "54fa1aa5",
"metadata": {},
"outputs": [],
"source": [
"class PubMedQASplits(PubMedQA):\n",
"\n",
" def _load_data(self):\n",
" # load the original test data \n",
" super()._load_data()\n",
" # split the data into train, dev and test\n",
" import numpy as np \n",
" np.random.seed(42)\n",
" permutation = np.random.permutation(len(self._dev_data))\n",
" full_test_data = self._dev_data \n",
" # randomly select 10 samples for train, 40 for dev, and 100 for test\n",
" self._train_data = [full_test_data[idx] for idx in permutation[:50]]\n",
" self._dev_data = [full_test_data[idx] for idx in permutation[:50]]\n",
" self._test_data =self._test_data[0:500]\n",
" self._fulldata = full_test_data\n",
"\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" context_list = []\n",
" paragraphs = example[\"context\"][\"contexts\"]\n",
" context = \"\\n\".join(paragraphs)\n",
" problem = \"Context: {}\\n\\nQuestion: {}\\n\\nAnswer:\".format(context, example[\"question\"])\n",
" return {\"problem\": problem}\n",
"\n",
"\n",
"hotpotqa_graph_data = {\n",
" \"goal\": \"Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"answer_generate\",\n",
" \"description\": \"Answer the question based on the context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"problem\", \"type\": \"str\", \"required\": True, \"description\": \"The problem to solve.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The answer to the problem.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1ebace55",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"\n",
"os.environ[\"AZURE_OPENAI_KEY\"] = \"\"\n",
"os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n",
"llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n",
" azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
" azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n",
" api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"\n",
"executor_llm = LiteLLM(config=llm_config)\n",
"optimizer_llm = LiteLLM(config=llm_config)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "20e078fa",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2025-12-07 11:53:15.783\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hotpotqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /gpfs/radev/home/tl688/.evoagentx/data/hotpotqa/hotpot_train_v1.1.json ...\u001b[0m\n",
"\u001b[32m2025-12-07 11:53:19.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hotpotqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /gpfs/radev/home/tl688/.evoagentx/data/hotpotqa/hotpot_dev_distractor_v1.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = PubMedQASplits()\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "78d5904e",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"7405"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")"
]
},
{
"cell_type": "markdown",
"id": "5e0b4cc8",
"metadata": {},
"source": [
"# textgrad (prompt)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "d686ee20",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2025-12-07 11:53:19.708\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n",
"Evaluating workflow: 0%| | 1/500 [00:01<12:07, 1.46s/it]Task exception was never retrieved\n",
"future: exception=RuntimeError('Event loop is closed')>\n",
"Traceback (most recent call last):\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/tasks.py\", line 277, in __step\n",
" result = coro.send(None)\n",
" ^^^^^^^^^^^^^^^\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/utils.py\", line 873, in _client_async_logging_helper\n",
" GLOBAL_LOGGING_WORKER.ensure_initialized_and_enqueue(\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 322, in ensure_initialized_and_enqueue\n",
" self.enqueue(async_coroutine)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 131, in enqueue\n",
" self._queue.put_nowait(task)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 147, in put_nowait\n",
" self._wakeup_next(self._getters)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 63, in _wakeup_next\n",
" waiter.set_result(None)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 263, in set_result\n",
" self.__schedule_callbacks()\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 173, in __schedule_callbacks\n",
" self._loop.call_soon(callback, self, context=ctx)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 762, in call_soon\n",
" self._check_closed()\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 520, in _check_closed\n",
" raise RuntimeError('Event loop is closed')\n",
"RuntimeError: Event loop is closed\n",
"Evaluating workflow: 0%| | 2/500 [00:01<05:36, 1.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 8/500 [00:01<01:05, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.75, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 12/500 [00:02<00:50, 9.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.18181818181818182, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 18/500 [00:02<00:29, 16.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 21/500 [00:03<00:59, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 26/500 [00:03<00:45, 10.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 28/500 [00:03<00:46, 10.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 32/500 [00:04<00:41, 11.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 37/500 [00:04<00:36, 12.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7272727272727273, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5833333333333334, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 39/500 [00:04<00:33, 13.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 41/500 [00:04<00:40, 11.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 43/500 [00:05<00:53, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.13333333333333333, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 46/500 [00:05<00:41, 10.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 48/500 [00:05<00:51, 8.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 51/500 [00:06<00:55, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 56/500 [00:06<00:48, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 58/500 [00:06<00:45, 9.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 60/500 [00:07<00:44, 9.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 64/500 [00:07<00:40, 10.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.08695652173913045, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 67/500 [00:07<00:53, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 71/500 [00:08<00:48, 8.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 74/500 [00:08<00:41, 10.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 76/500 [00:08<00:38, 10.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.03333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 79/500 [00:09<00:49, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.30769230769230765, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.16666666666666669, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 81/500 [01:00<47:48, 6.85s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 87/500 [01:00<18:50, 2.74s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 89/500 [01:01<14:30, 2.12s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.125, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 93/500 [01:01<08:11, 1.21s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 97/500 [01:02<05:14, 1.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 99/500 [01:02<04:09, 1.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 101/500 [01:02<03:23, 1.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.27272727272727276, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 106/500 [01:02<01:50, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 108/500 [01:03<01:36, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 110/500 [01:03<01:48, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 112/500 [01:04<01:29, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.9090909090909091, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 116/500 [01:04<01:02, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.04081632653061225, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4615384615384615, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 121/500 [01:04<00:37, 10.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 124/500 [01:04<00:33, 11.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7368421052631579, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 126/500 [01:05<00:38, 9.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.18181818181818182, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 130/500 [01:05<00:40, 9.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7777777777777778, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 136/500 [01:06<00:45, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 146/500 [01:06<00:21, 16.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 149/500 [01:07<00:44, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.42857142857142855, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 157/500 [01:08<00:32, 10.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.21052631578947367, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 162/500 [01:08<00:26, 12.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 168/500 [01:09<00:33, 10.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.1111111111111111, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 171/500 [01:10<00:41, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 173/500 [02:00<27:24, 5.03s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 175/500 [02:01<19:41, 3.64s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 177/500 [02:01<14:02, 2.61s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 182/500 [02:01<06:13, 1.17s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.625, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 184/500 [02:01<04:39, 1.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.38095238095238093, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 186/500 [02:02<03:33, 1.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.11764705882352941, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 188/500 [02:02<03:08, 1.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.14285714285714288, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 192/500 [02:03<01:42, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 197/500 [02:03<00:56, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 199/500 [02:03<00:51, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 204/500 [02:04<00:46, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.16666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 206/500 [02:04<00:49, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 209/500 [02:05<00:37, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 215/500 [02:05<00:25, 11.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 217/500 [02:05<00:24, 11.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7777777777777778, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 219/500 [02:06<00:48, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 224/500 [02:06<00:36, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.21428571428571425, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 229/500 [02:07<00:23, 11.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6153846153846153, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.15384615384615385, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 232/500 [02:07<00:24, 11.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 234/500 [02:07<00:25, 10.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 236/500 [02:07<00:23, 11.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 238/500 [02:08<00:28, 9.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 243/500 [02:08<00:24, 10.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 248/500 [02:09<00:21, 11.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 250/500 [02:09<00:29, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 254/500 [02:09<00:27, 9.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.15384615384615385, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 257/500 [02:10<00:25, 9.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 259/500 [02:10<00:27, 8.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 263/500 [02:10<00:25, 9.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.16666666666666669, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 265/500 [02:11<00:31, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.13333333333333336, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 269/500 [03:01<19:09, 4.98s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5806451612903226, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 274/500 [03:01<08:46, 2.33s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 277/500 [03:01<05:37, 1.51s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 279/500 [03:01<04:11, 1.14s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 281/500 [03:02<03:10, 1.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 286/500 [03:03<01:45, 2.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.9090909090909091, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.23529411764705882, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 288/500 [03:03<01:22, 2.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 294/500 [03:03<00:43, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2025-12-07 11:56:23.691\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mevoagentx.workflow.workflow\u001b[0m:\u001b[36masync_execute\u001b[0m:\u001b[36m104\u001b[0m - \u001b[31m\u001b[1mAn Error occurs when executing the workflow: Error during single_generate_async: litellm.RateLimitError: AzureException RateLimitError - Your requests to gpt-4o-mini for gpt-4o-mini in East US have exceeded the token rate limit for your current AIServices S0 pricing tier. This request was for ChatCompletions_Create under Azure OpenAI API version 2025-01-01-preview. Please retry after 1 second. To increase your default rate limit, visit: https://aka.ms/oai/quotaincrease.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 296/500 [03:03<00:35, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 298/500 [03:04<00:33, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 301/500 [03:05<00:44, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 304/500 [03:05<00:29, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 308/500 [03:05<00:22, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 310/500 [03:05<00:18, 10.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 313/500 [03:06<00:17, 10.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3571428571428571, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 317/500 [03:07<00:29, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 322/500 [03:07<00:16, 10.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 330/500 [03:07<00:11, 15.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 333/500 [03:08<00:21, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 335/500 [03:08<00:22, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 340/500 [03:09<00:16, 9.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 342/500 [03:09<00:16, 9.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 344/500 [03:09<00:18, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 346/500 [03:10<00:20, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 348/500 [03:10<00:20, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 350/500 [03:10<00:16, 9.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 352/500 [03:10<00:16, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.04444444444444445, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 354/500 [03:11<00:18, 7.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 357/500 [03:11<00:16, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 360/500 [04:01<16:37, 7.12s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.75, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 364/500 [04:01<08:13, 3.63s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.07142857142857144, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 369/500 [04:01<03:32, 1.62s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6363636363636364, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.21052631578947367, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 371/500 [04:02<02:36, 1.21s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 374/500 [04:02<01:43, 1.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.35294117647058826, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 376/500 [04:02<01:21, 1.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 378/500 [04:03<01:05, 1.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 381/500 [04:03<00:37, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.10526315789473684, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 385/500 [04:03<00:22, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 388/500 [04:04<00:15, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.11764705882352941, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 390/500 [04:04<00:16, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 392/500 [04:05<00:24, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 396/500 [04:05<00:16, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 401/500 [04:06<00:10, 9.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.18181818181818182, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 403/500 [04:06<00:08, 10.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.35714285714285715, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 405/500 [04:06<00:15, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 407/500 [04:07<00:16, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 410/500 [04:07<00:14, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 412/500 [04:07<00:11, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 418/500 [04:08<00:07, 11.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 420/500 [04:08<00:06, 11.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 422/500 [04:08<00:07, 10.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 424/500 [04:08<00:07, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 427/500 [04:09<00:08, 8.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 431/500 [04:09<00:06, 10.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 433/500 [04:09<00:06, 10.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 435/500 [04:09<00:06, 9.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 437/500 [04:10<00:07, 8.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 439/500 [04:10<00:09, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 443/500 [04:11<00:05, 9.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 446/500 [04:11<00:04, 12.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 448/500 [04:11<00:03, 13.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8235294117647058, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 450/500 [04:11<00:04, 10.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6153846153846153, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 452/500 [04:12<00:07, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 453/500 [05:01<06:40, 8.52s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 456/500 [05:01<03:37, 4.94s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5263157894736842, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 460/500 [05:01<01:42, 2.55s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 462/500 [05:02<01:09, 1.83s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 464/500 [05:02<00:49, 1.37s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 466/500 [05:02<00:32, 1.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.06060606060606061, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 467/500 [05:02<00:25, 1.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 470/500 [05:03<00:12, 2.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.47619047619047616, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 474/500 [05:03<00:05, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 476/500 [05:03<00:04, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Evaluating workflow: 95%|█████████▌| 477/500 [05:04<00:05, 3.99it/s]Unclosed connector\n",
"connections: ['deque([(, 9706833.339199008)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.608262513)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.58293831)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.98561138)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706833.144348238)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.568526758)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.190116027)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706833.246411916)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.970390048)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.728774056)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.971580995)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.543152127)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.332917776)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706833.070988659)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.820581889)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706833.061071675)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.660261652)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706833.057310568)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.320419114)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706832.92792528)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.039624516)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.41924051)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.0620211)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.30129984)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706833.969782745)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.279454239)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.86199866)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed connector\n",
"connections: ['deque([(, 9706834.566667313)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.202076651)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.574714107)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.32608822)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.86027172)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.79403196)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.198638653)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.32135346)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.276575617)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.312936094)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.385317784)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.561538259)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.83374123)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.09928204)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.475442462)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.954519052)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.543440253)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.713191785)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.011249008)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706834.854768094)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.67486628)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.152896648)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.7468186)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.870078135)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.21862246)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.747099983)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.275274444)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.94819115)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed connector\n",
"connections: ['deque([(, 9706836.904656712)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.03730312)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.460972665)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706835.972691856)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.449210517)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.10574446)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706836.37345917)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.474045642)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.115622288)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.94052132)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.0674185)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.622524388)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.301349184)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.950606762)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.991567569)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.686984628)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.088006405)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706838.171331778)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706840.300554968)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.778763738)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.9880991)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706837.726972347)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706839.455664787)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706891.81647973)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706891.618730936)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706891.844060568)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706892.169079188)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9706892.218102802)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(