{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2a8e89d4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n",
" warnings.warn(\n"
]
}
],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "54f40417",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from evoagentx.benchmark import PertQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "54fa1aa5",
"metadata": {},
"outputs": [],
"source": [
"from evoagentx.benchmark import PertQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question\"])\n",
" return {\"problem\": problem}\n",
"\n",
"\n",
"hotpotqa_graph_data = {\n",
" \"goal\": \"Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"answer_generate\",\n",
" \"description\": \"Answer the question based on the context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"problem\", \"type\": \"str\", \"required\": True, \"description\": \"The problem to solve.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The answer to the problem.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1ebace55",
"metadata": {},
"outputs": [],
"source": [
"api_key = \"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\"\n",
"# Function to encode the image\n",
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = api_key\n",
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
"\n",
"\n",
"llm_config = OpenAILLMConfig(model=\"gpt-4o-mini-2024-07-18\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"llm = OpenAILLM(config=llm_config)\n",
"executor_llm = OpenAILLM(config=llm_config)\n",
"optimizer_llm = OpenAILLM(config=llm_config)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "20e078fa",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 00:51:11.467\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_update_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-13 00:51:11.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_update_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-13 00:51:11.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/adamson_update_test.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = PertQA(pertdata='adamson')\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "b485d4ad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2160"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._dev_data)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "aa7522fd",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3000"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._test_data)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "3b52c62d",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 1/3000 [00:04<3:50:03, 4.60s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 2/3000 [00:05<1:56:41, 2.34s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 4/3000 [00:05<43:31, 1.15it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 8/3000 [00:06<15:05, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 10/3000 [00:06<11:55, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 11/3000 [00:07<17:55, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 15/3000 [00:07<10:33, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 19/3000 [00:08<08:29, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 22/3000 [00:09<12:23, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 26/3000 [00:09<06:59, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 28/3000 [00:10<09:22, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 30/3000 [00:10<11:22, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 32/3000 [00:11<10:29, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 36/3000 [00:11<06:52, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 40/3000 [00:12<07:24, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 41/3000 [00:13<13:11, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 43/3000 [00:13<11:43, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 45/3000 [00:13<11:00, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 48/3000 [00:14<08:43, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 49/3000 [00:14<11:54, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 53/3000 [00:15<08:38, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 56/3000 [00:15<06:30, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 58/3000 [00:16<08:46, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 60/3000 [00:16<11:20, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 61/3000 [00:17<10:35, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 63/3000 [00:17<10:57, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 66/3000 [00:18<09:39, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 67/3000 [00:18<11:11, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 68/3000 [00:18<10:57, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 71/3000 [00:19<11:36, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▎ | 75/3000 [00:19<06:59, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 77/3000 [00:20<08:52, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 80/3000 [00:20<06:52, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 84/3000 [00:21<09:19, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 85/3000 [00:21<10:38, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 87/3000 [00:22<10:09, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 88/3000 [00:22<10:39, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 91/3000 [00:23<08:48, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 93/3000 [00:23<06:35, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 95/3000 [00:23<07:47, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 96/3000 [00:23<09:18, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 97/3000 [00:24<10:28, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 99/3000 [00:24<08:28, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 102/3000 [00:24<07:20, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 104/3000 [00:25<07:58, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 105/3000 [00:25<08:55, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 106/3000 [00:25<11:14, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:26<10:59, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 108/3000 [00:26<13:40, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 109/3000 [00:26<13:46, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 111/3000 [00:27<12:10, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 116/3000 [00:27<06:49, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 119/3000 [00:28<07:10, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 121/3000 [00:28<06:21, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 123/3000 [00:28<06:35, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 124/3000 [00:29<08:22, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 127/3000 [00:30<16:07, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 128/3000 [00:31<14:07, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 129/3000 [00:31<13:54, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 131/3000 [00:31<11:09, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:31<08:11, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 135/3000 [00:32<07:39, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 138/3000 [00:32<06:21, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 140/3000 [00:32<05:07, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 142/3000 [00:32<05:38, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 143/3000 [00:33<08:02, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 144/3000 [00:33<09:04, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 145/3000 [00:33<10:27, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 147/3000 [00:34<14:15, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 151/3000 [00:35<11:22, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:36<11:58, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 157/3000 [00:36<08:03, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 162/3000 [00:37<05:18, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 164/3000 [00:37<07:45, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 166/3000 [00:38<11:29, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 168/3000 [00:39<11:50, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 169/3000 [00:39<12:31, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 172/3000 [00:39<08:04, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 175/3000 [00:40<08:39, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 179/3000 [00:40<05:56, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 181/3000 [00:41<07:29, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 182/3000 [00:41<07:59, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 183/3000 [00:41<09:12, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 184/3000 [00:41<09:40, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 186/3000 [00:42<11:38, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 188/3000 [00:42<07:57, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 189/3000 [00:43<09:01, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 191/3000 [00:43<08:41, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 193/3000 [00:43<09:03, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 196/3000 [00:44<06:02, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 198/3000 [00:44<07:00, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 201/3000 [00:44<06:02, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 202/3000 [00:45<11:55, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 205/3000 [00:46<11:00, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 206/3000 [00:46<09:58, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 207/3000 [00:46<13:14, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 209/3000 [00:47<11:05, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 214/3000 [00:47<06:57, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 215/3000 [00:48<09:27, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 220/3000 [00:48<05:28, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 221/3000 [00:48<06:10, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 222/3000 [00:49<08:45, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 223/3000 [00:49<13:33, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 224/3000 [00:50<14:13, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 226/3000 [00:50<13:13, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 230/3000 [00:51<08:21, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 233/3000 [00:51<08:51, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 236/3000 [00:52<06:35, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 239/3000 [00:52<06:18, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 240/3000 [00:52<08:11, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 241/3000 [00:53<10:13, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 243/3000 [00:53<10:01, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 247/3000 [00:54<08:00, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 249/3000 [00:54<07:50, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 250/3000 [00:54<07:18, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 251/3000 [00:55<08:28, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 254/3000 [00:55<07:52, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 256/3000 [00:56<08:19, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 257/3000 [00:56<13:09, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 260/3000 [00:56<08:27, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 261/3000 [00:57<10:43, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 264/3000 [00:58<09:15, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 265/3000 [00:58<08:15, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 269/3000 [00:58<06:37, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 270/3000 [00:58<06:58, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 272/3000 [00:59<09:02, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 273/3000 [00:59<12:15, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 274/3000 [01:00<12:37, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 275/3000 [01:00<12:21, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 278/3000 [01:00<09:01, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 279/3000 [01:01<09:39, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 280/3000 [01:01<13:56, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 284/3000 [01:02<08:20, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 287/3000 [01:02<06:40, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 289/3000 [01:03<08:29, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 290/3000 [01:03<09:01, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 292/3000 [01:03<08:07, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 294/3000 [01:03<05:58, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 296/3000 [01:04<06:09, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 298/3000 [01:04<06:05, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 301/3000 [01:05<11:45, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 304/3000 [01:05<07:25, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [01:06<11:17, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 309/3000 [01:07<08:08, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 312/3000 [01:07<08:04, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 313/3000 [01:07<08:10, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 316/3000 [01:08<06:29, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 318/3000 [01:08<08:11, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 319/3000 [01:09<12:52, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 320/3000 [01:09<14:36, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 322/3000 [01:10<10:31, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 324/3000 [01:10<11:11, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 326/3000 [01:10<07:30, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 328/3000 [01:11<09:26, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 333/3000 [01:11<06:23, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 336/3000 [01:12<07:29, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 338/3000 [01:12<07:28, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [01:13<07:22, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 342/3000 [01:13<06:34, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 344/3000 [01:14<09:29, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 345/3000 [01:14<13:19, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 346/3000 [01:14<12:17, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 348/3000 [01:15<10:09, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 349/3000 [01:15<09:54, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 353/3000 [01:15<05:44, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 356/3000 [01:16<07:12, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 358/3000 [01:16<07:15, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 361/3000 [01:17<11:04, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 362/3000 [01:18<12:35, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 364/3000 [01:18<10:26, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 365/3000 [01:18<10:07, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 367/3000 [01:19<08:39, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 371/3000 [01:19<06:50, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 373/3000 [01:19<06:03, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 374/3000 [01:20<06:36, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 377/3000 [01:20<06:55, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 378/3000 [01:20<08:24, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 379/3000 [01:21<09:19, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 380/3000 [01:21<12:32, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 382/3000 [01:22<10:27, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 383/3000 [01:22<15:28, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 388/3000 [01:23<07:01, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 390/3000 [01:23<07:45, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 393/3000 [01:24<07:06, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 396/3000 [01:24<04:46, 9.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 398/3000 [01:25<09:34, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 400/3000 [01:25<09:24, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 401/3000 [01:25<08:55, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 404/3000 [01:26<07:38, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 406/3000 [01:27<12:22, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [01:27<08:35, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 415/3000 [01:27<05:05, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 418/3000 [01:28<08:27, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 420/3000 [01:29<07:50, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 421/3000 [01:29<08:05, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 422/3000 [01:29<09:51, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 425/3000 [01:30<09:38, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 427/3000 [01:30<08:40, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 429/3000 [01:31<08:41, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 430/3000 [01:31<08:33, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 432/3000 [01:31<07:51, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 433/3000 [01:32<09:10, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 435/3000 [01:32<10:04, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 437/3000 [01:33<13:55, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 440/3000 [01:33<07:50, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 443/3000 [01:34<06:48, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 445/3000 [01:34<06:17, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 446/3000 [01:34<10:16, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 447/3000 [01:35<12:55, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 450/3000 [01:36<10:34, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 453/3000 [01:36<06:35, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 455/3000 [01:36<05:16, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 457/3000 [01:36<06:12, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 459/3000 [01:37<08:07, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 460/3000 [01:37<07:54, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 461/3000 [01:37<08:45, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 462/3000 [01:38<08:43, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 463/3000 [01:38<11:08, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 466/3000 [01:38<08:06, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 467/3000 [01:39<10:26, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 470/3000 [01:39<08:08, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 471/3000 [01:40<11:28, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 476/3000 [01:40<06:04, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 479/3000 [01:41<07:17, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 483/3000 [01:42<07:14, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [01:42<10:44, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 488/3000 [01:43<08:32, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 490/3000 [01:43<08:22, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 491/3000 [01:43<08:52, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 493/3000 [01:44<08:50, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 494/3000 [01:44<10:40, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 496/3000 [01:45<12:31, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 499/3000 [01:45<08:04, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 501/3000 [01:46<08:12, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 502/3000 [01:46<07:41, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 504/3000 [01:46<06:42, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 508/3000 [01:47<05:52, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 509/3000 [01:47<09:19, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 512/3000 [01:48<08:33, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 513/3000 [01:48<09:15, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 514/3000 [01:49<11:41, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 515/3000 [01:49<11:22, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 517/3000 [01:49<10:18, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 518/3000 [01:49<09:09, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 520/3000 [01:50<07:20, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 523/3000 [01:50<06:26, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 524/3000 [01:50<06:51, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 527/3000 [01:51<05:46, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 530/3000 [01:51<06:25, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 531/3000 [01:51<06:12, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 533/3000 [01:52<05:43, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 534/3000 [01:52<11:51, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 537/3000 [01:53<08:41, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 538/3000 [01:53<08:00, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 541/3000 [01:54<07:51, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 544/3000 [01:54<07:08, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 546/3000 [01:54<07:04, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 547/3000 [01:55<08:31, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [01:55<08:38, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 550/3000 [01:55<09:15, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 552/3000 [01:56<07:51, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 553/3000 [01:56<08:12, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 555/3000 [01:57<08:51, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 557/3000 [01:57<07:22, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 559/3000 [01:57<05:47, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 561/3000 [01:57<05:37, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 563/3000 [01:58<05:51, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 564/3000 [01:58<07:37, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 566/3000 [01:58<06:45, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 570/3000 [01:59<06:55, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 571/3000 [01:59<06:29, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 572/3000 [01:59<08:09, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 575/3000 [02:00<08:38, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 576/3000 [02:00<08:26, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 577/3000 [02:01<08:46, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 579/3000 [02:01<07:45, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 581/3000 [02:01<07:02, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 583/3000 [02:02<06:52, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 585/3000 [02:02<07:16, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 588/3000 [02:02<05:58, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 590/3000 [02:03<06:47, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 591/3000 [02:04<11:11, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 593/3000 [02:04<11:24, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 595/3000 [02:05<09:39, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 599/3000 [02:05<06:00, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 601/3000 [02:05<06:22, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 603/3000 [02:06<06:46, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 605/3000 [02:06<08:00, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 606/3000 [02:07<09:46, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 608/3000 [02:07<09:27, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 611/3000 [02:08<08:45, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 612/3000 [02:08<10:32, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 616/3000 [02:09<06:53, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 618/3000 [02:09<06:40, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 620/3000 [02:09<05:39, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 621/3000 [02:10<07:52, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 622/3000 [02:10<10:11, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 624/3000 [02:10<07:55, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 626/3000 [02:10<07:07, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 629/3000 [02:11<07:45, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 632/3000 [02:12<09:02, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 635/3000 [02:12<07:10, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 636/3000 [02:13<08:18, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 638/3000 [02:13<07:17, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 639/3000 [02:13<07:37, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 640/3000 [02:14<09:02, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 642/3000 [02:14<09:29, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 644/3000 [02:14<06:23, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 647/3000 [02:15<07:24, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 651/3000 [02:16<06:06, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 652/3000 [02:16<10:27, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 655/3000 [02:17<08:39, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 657/3000 [02:17<07:26, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 661/3000 [02:18<06:40, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 666/3000 [02:18<04:37, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 668/3000 [02:19<04:13, 9.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 670/3000 [02:19<04:27, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 674/3000 [02:21<09:43, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▎ | 675/3000 [02:21<09:16, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 677/3000 [02:21<07:50, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 679/3000 [02:22<07:56, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 680/3000 [02:22<08:04, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 683/3000 [02:22<08:26, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 685/3000 [02:23<07:38, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 686/3000 [02:23<09:04, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 689/3000 [02:24<07:22, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 690/3000 [02:24<07:57, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 693/3000 [02:24<07:26, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 695/3000 [02:25<05:50, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 696/3000 [02:25<10:11, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 699/3000 [02:26<07:49, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 701/3000 [02:26<06:27, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 702/3000 [02:26<08:05, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 704/3000 [02:27<09:49, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 706/3000 [02:27<06:51, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 707/3000 [02:28<08:07, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 711/3000 [02:28<06:23, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 712/3000 [02:29<08:00, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 714/3000 [02:29<08:17, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 715/3000 [02:30<11:49, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 719/3000 [02:30<06:37, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 722/3000 [02:30<05:05, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 724/3000 [02:30<04:14, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 726/3000 [02:31<06:49, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 728/3000 [02:32<07:41, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 732/3000 [02:32<06:11, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 733/3000 [02:33<11:01, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 734/3000 [02:33<10:25, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 735/3000 [02:34<10:51, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 738/3000 [02:34<06:40, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 741/3000 [02:34<06:57, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 744/3000 [02:35<06:21, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 745/3000 [02:35<08:30, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 749/3000 [02:36<05:50, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 750/3000 [02:36<08:06, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 753/3000 [02:37<09:26, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 755/3000 [02:37<07:09, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 756/3000 [02:38<07:51, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 758/3000 [02:38<07:47, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 759/3000 [02:38<08:06, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 760/3000 [02:39<08:21, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 762/3000 [02:39<08:32, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 764/3000 [02:40<08:19, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 766/3000 [02:40<07:02, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 767/3000 [02:40<06:04, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 768/3000 [02:40<07:06, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 770/3000 [02:41<07:27, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 772/3000 [02:42<10:48, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 777/3000 [02:42<04:41, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 779/3000 [02:43<07:56, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 780/3000 [02:43<07:13, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 781/3000 [02:43<07:48, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 784/3000 [02:44<06:51, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 785/3000 [02:44<09:36, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 788/3000 [02:45<08:11, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 789/3000 [02:45<09:26, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 792/3000 [02:45<07:11, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 795/3000 [02:46<07:54, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 796/3000 [02:47<10:45, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 799/3000 [02:47<07:53, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 800/3000 [02:48<09:15, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 801/3000 [02:48<12:12, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 807/3000 [02:49<06:00, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 808/3000 [02:49<06:45, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 811/3000 [02:49<05:54, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 813/3000 [02:50<06:59, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 814/3000 [02:50<07:58, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 815/3000 [02:51<08:19, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 816/3000 [02:51<10:07, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 818/3000 [02:51<08:57, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 823/3000 [02:52<04:55, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 827/3000 [02:53<06:58, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 830/3000 [02:53<05:08, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 832/3000 [02:54<07:33, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 833/3000 [02:54<07:38, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 836/3000 [02:55<06:45, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 838/3000 [02:55<08:32, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 840/3000 [02:56<06:41, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 843/3000 [02:56<04:08, 8.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 846/3000 [02:57<07:42, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 848/3000 [02:57<07:58, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 850/3000 [02:57<06:31, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 852/3000 [02:58<05:42, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 853/3000 [02:59<12:02, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 854/3000 [02:59<11:30, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 856/3000 [03:00<10:08, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 860/3000 [03:00<06:18, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 863/3000 [03:01<06:38, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 865/3000 [03:01<05:04, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 867/3000 [03:01<05:17, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 868/3000 [03:01<07:43, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 870/3000 [03:02<08:33, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 872/3000 [03:02<07:50, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 873/3000 [03:03<07:59, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 875/3000 [03:03<09:23, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 877/3000 [03:04<07:33, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 879/3000 [03:04<06:50, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 882/3000 [03:04<05:52, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 884/3000 [03:05<06:19, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 889/3000 [03:05<03:21, 10.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [03:06<06:18, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 893/3000 [03:06<06:12, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 894/3000 [03:07<06:53, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 895/3000 [03:07<09:37, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 899/3000 [03:08<07:21, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 900/3000 [03:08<07:11, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 901/3000 [03:08<09:01, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 902/3000 [03:09<09:21, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 905/3000 [03:09<05:48, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 906/3000 [03:09<06:22, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 909/3000 [03:09<04:32, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 910/3000 [03:10<05:31, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 913/3000 [03:11<08:31, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 915/3000 [03:12<10:04, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 916/3000 [03:12<10:42, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 919/3000 [03:12<07:15, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 921/3000 [03:13<06:06, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 924/3000 [03:13<05:08, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 925/3000 [03:13<06:09, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 928/3000 [03:14<05:54, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 929/3000 [03:14<06:44, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 932/3000 [03:15<09:50, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 934/3000 [03:16<07:24, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 938/3000 [03:16<04:51, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 939/3000 [03:16<04:36, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 940/3000 [03:16<05:27, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 943/3000 [03:17<05:08, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 946/3000 [03:17<05:48, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 947/3000 [03:17<05:39, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 948/3000 [03:18<07:38, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 951/3000 [03:19<07:05, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 952/3000 [03:19<08:42, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 953/3000 [03:19<08:18, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 954/3000 [03:19<08:04, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 955/3000 [03:20<07:51, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 957/3000 [03:20<06:05, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 960/3000 [03:20<06:19, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 961/3000 [03:21<06:39, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 966/3000 [03:22<06:16, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 968/3000 [03:22<07:31, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 971/3000 [03:23<07:37, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▎ | 975/3000 [03:23<04:39, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 980/3000 [03:24<04:09, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 982/3000 [03:25<06:19, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 984/3000 [03:25<08:20, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 987/3000 [03:26<06:34, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 989/3000 [03:26<06:39, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 992/3000 [03:27<05:21, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 993/3000 [03:27<07:27, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 995/3000 [03:27<06:57, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 996/3000 [03:28<06:45, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 999/3000 [03:28<05:27, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1000/3000 [03:29<08:57, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1002/3000 [03:29<09:11, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1006/3000 [03:30<06:33, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1008/3000 [03:30<05:36, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1011/3000 [03:31<04:16, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1013/3000 [03:31<05:31, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1014/3000 [03:31<07:01, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1016/3000 [03:32<06:59, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1017/3000 [03:32<07:16, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1019/3000 [03:33<10:08, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1020/3000 [03:33<09:37, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1024/3000 [03:34<05:28, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1026/3000 [03:34<06:00, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1027/3000 [03:34<06:10, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1029/3000 [03:35<05:12, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1031/3000 [03:35<05:18, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1033/3000 [03:36<07:05, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1036/3000 [03:36<05:07, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1038/3000 [03:36<06:33, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1039/3000 [03:37<06:12, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1042/3000 [03:37<05:03, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1043/3000 [03:38<08:07, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1045/3000 [03:38<07:25, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1048/3000 [03:38<05:02, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1050/3000 [03:39<07:56, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1051/3000 [03:39<07:57, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1054/3000 [03:40<09:30, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1059/3000 [03:41<05:24, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1060/3000 [03:41<05:52, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1063/3000 [03:41<04:57, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1064/3000 [03:42<06:29, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1065/3000 [03:42<08:02, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1066/3000 [03:43<09:01, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1069/3000 [03:43<07:23, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1072/3000 [03:44<07:35, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1074/3000 [03:44<06:55, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1077/3000 [03:45<06:34, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1079/3000 [03:45<05:46, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1081/3000 [03:46<05:48, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1084/3000 [03:46<05:16, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1086/3000 [03:47<07:07, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1087/3000 [03:47<07:26, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1090/3000 [03:48<05:49, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1093/3000 [03:48<05:52, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1094/3000 [03:49<05:40, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1097/3000 [03:50<07:57, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1098/3000 [03:50<07:42, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1100/3000 [03:50<06:29, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1102/3000 [03:50<06:01, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1105/3000 [03:51<04:44, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1108/3000 [03:51<04:50, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1109/3000 [03:52<06:16, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1113/3000 [03:52<04:38, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1114/3000 [03:52<05:53, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1115/3000 [03:53<11:17, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1119/3000 [03:54<05:42, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1123/3000 [03:54<04:25, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1127/3000 [03:55<04:01, 7.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1129/3000 [03:55<05:08, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1132/3000 [03:56<04:55, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1134/3000 [03:56<06:56, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1135/3000 [03:57<09:38, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1137/3000 [03:58<12:28, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1141/3000 [03:59<06:16, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1144/3000 [03:59<04:15, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1146/3000 [03:59<04:02, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1148/3000 [04:00<04:41, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1150/3000 [04:00<05:22, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1152/3000 [04:00<04:07, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1154/3000 [04:01<07:39, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1155/3000 [04:02<10:11, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1159/3000 [04:02<06:22, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1160/3000 [04:02<06:36, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1162/3000 [04:03<06:10, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1166/3000 [04:03<05:13, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1167/3000 [04:04<05:58, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1170/3000 [04:04<04:15, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1174/3000 [04:05<03:58, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1175/3000 [04:05<07:14, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1177/3000 [04:06<07:22, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1179/3000 [04:06<05:11, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1180/3000 [04:07<09:03, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1182/3000 [04:07<07:03, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1185/3000 [04:07<05:06, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1189/3000 [04:08<04:02, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1191/3000 [04:08<05:32, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1193/3000 [04:09<06:48, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1194/3000 [04:09<06:44, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1197/3000 [04:10<07:13, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1199/3000 [04:10<06:22, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1202/3000 [04:11<06:34, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1203/3000 [04:12<07:44, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1205/3000 [04:12<06:42, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1209/3000 [04:12<04:59, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1211/3000 [04:13<04:50, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1212/3000 [04:13<05:07, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1213/3000 [04:14<07:55, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1216/3000 [04:15<09:48, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1218/3000 [04:15<08:42, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1220/3000 [04:16<07:49, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1221/3000 [04:16<08:44, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1222/3000 [04:16<09:29, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1225/3000 [04:17<07:04, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1229/3000 [04:17<04:29, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1230/3000 [04:18<04:32, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1233/3000 [04:18<04:22, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1234/3000 [04:18<04:08, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1236/3000 [04:19<07:56, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1237/3000 [04:19<07:44, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1239/3000 [04:20<09:02, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1241/3000 [04:20<07:28, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1245/3000 [04:21<04:08, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1247/3000 [04:21<05:34, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1251/3000 [04:22<05:07, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1255/3000 [04:23<04:14, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1257/3000 [04:23<06:45, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1258/3000 [04:24<06:48, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1259/3000 [04:24<10:34, 2.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1261/3000 [04:25<07:42, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1264/3000 [04:25<06:13, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1265/3000 [04:25<05:44, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1268/3000 [04:26<04:07, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1272/3000 [04:26<03:46, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1273/3000 [04:26<04:02, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▎ | 1275/3000 [04:27<04:42, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1276/3000 [04:27<05:19, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1277/3000 [04:27<05:35, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1278/3000 [04:28<13:14, 2.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1280/3000 [04:29<10:53, 2.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1282/3000 [04:29<08:23, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1284/3000 [04:30<07:32, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1287/3000 [04:30<06:11, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1289/3000 [04:31<05:06, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1292/3000 [04:31<04:39, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1293/3000 [04:32<06:35, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1295/3000 [04:32<05:39, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1296/3000 [04:33<08:45, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1300/3000 [04:33<06:25, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1301/3000 [04:34<06:33, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1303/3000 [04:34<06:34, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1304/3000 [04:34<05:41, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1307/3000 [04:35<05:08, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1308/3000 [04:35<06:14, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1311/3000 [04:36<05:28, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1313/3000 [04:36<07:25, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1314/3000 [04:37<08:48, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1316/3000 [04:37<07:12, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1319/3000 [04:38<05:54, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1323/3000 [04:38<03:55, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1324/3000 [04:38<04:40, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1325/3000 [04:39<05:50, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1328/3000 [04:39<05:04, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1330/3000 [04:40<05:22, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1332/3000 [04:40<04:27, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1333/3000 [04:41<09:04, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1336/3000 [04:42<09:25, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1339/3000 [04:42<06:49, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1341/3000 [04:42<05:02, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1344/3000 [04:43<04:37, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1345/3000 [04:43<04:15, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1346/3000 [04:43<05:19, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1351/3000 [04:44<03:02, 9.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1353/3000 [04:45<08:10, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1356/3000 [04:46<06:47, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1359/3000 [04:46<06:42, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1361/3000 [04:47<04:48, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1364/3000 [04:47<04:30, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1366/3000 [04:47<04:27, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1369/3000 [04:47<02:47, 9.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1371/3000 [04:49<06:39, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1373/3000 [04:49<07:19, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1374/3000 [04:49<06:50, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1376/3000 [04:50<06:27, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1377/3000 [04:50<05:42, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1379/3000 [04:51<06:12, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1381/3000 [04:51<04:51, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1384/3000 [04:51<03:31, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1388/3000 [04:51<03:05, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1390/3000 [04:52<05:54, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1392/3000 [04:53<06:51, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1394/3000 [04:54<06:51, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1397/3000 [04:54<04:35, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1399/3000 [04:54<04:13, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1401/3000 [04:54<03:10, 8.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1403/3000 [04:54<03:14, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1405/3000 [04:55<05:23, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1406/3000 [04:56<06:10, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1407/3000 [04:56<06:15, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1408/3000 [04:56<07:37, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1411/3000 [04:57<06:16, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1415/3000 [04:58<05:41, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1416/3000 [04:58<05:25, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1417/3000 [04:58<06:07, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1423/3000 [04:59<03:48, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1425/3000 [05:00<05:51, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1428/3000 [05:00<06:13, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1430/3000 [05:01<05:56, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1433/3000 [05:01<04:49, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1434/3000 [05:01<04:51, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1437/3000 [05:02<04:55, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1439/3000 [05:02<05:10, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1441/3000 [05:03<04:55, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1443/3000 [05:03<03:41, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1444/3000 [05:03<04:02, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1445/3000 [05:03<04:36, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1446/3000 [05:04<05:08, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1448/3000 [05:04<05:12, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1450/3000 [05:05<06:13, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1452/3000 [05:05<05:31, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1453/3000 [05:05<05:40, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1455/3000 [05:06<05:11, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1456/3000 [05:06<06:30, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 1460/3000 [05:07<04:13, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1461/3000 [05:07<05:00, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1462/3000 [05:07<05:04, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1465/3000 [05:08<06:19, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1466/3000 [05:08<06:09, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1467/3000 [05:09<06:09, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1471/3000 [05:09<04:41, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1473/3000 [05:09<03:42, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1474/3000 [05:10<04:13, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1476/3000 [05:10<04:35, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1478/3000 [05:10<04:46, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1481/3000 [05:11<05:59, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1483/3000 [05:12<07:42, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1485/3000 [05:12<05:34, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1486/3000 [05:13<05:50, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1488/3000 [05:13<04:34, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1489/3000 [05:13<04:43, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1491/3000 [05:14<05:21, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1494/3000 [05:14<03:44, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1495/3000 [05:14<04:41, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1497/3000 [05:15<06:20, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1498/3000 [05:15<06:04, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1499/3000 [05:16<07:44, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1501/3000 [05:16<05:53, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1503/3000 [05:17<06:13, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1505/3000 [05:17<04:38, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1506/3000 [05:17<06:26, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1508/3000 [05:17<05:02, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1511/3000 [05:18<04:29, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1513/3000 [05:18<04:03, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1516/3000 [05:19<04:13, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1517/3000 [05:19<05:59, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1519/3000 [05:20<05:19, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1520/3000 [05:20<06:43, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1522/3000 [05:21<06:26, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1524/3000 [05:21<04:23, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1526/3000 [05:21<04:32, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1527/3000 [05:21<04:28, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1528/3000 [05:22<04:37, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1530/3000 [05:22<04:16, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1531/3000 [05:23<07:35, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1536/3000 [05:23<04:37, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1538/3000 [05:24<06:30, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1542/3000 [05:24<03:31, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1544/3000 [05:25<04:40, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1547/3000 [05:26<04:03, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1549/3000 [05:26<03:16, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1551/3000 [05:26<04:25, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1552/3000 [05:27<05:35, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1554/3000 [05:27<05:00, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1555/3000 [05:27<04:33, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1556/3000 [05:28<05:52, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1557/3000 [05:28<05:45, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1560/3000 [05:28<04:49, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1562/3000 [05:29<04:17, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1565/3000 [05:29<04:04, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1567/3000 [05:30<03:57, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1569/3000 [05:30<03:27, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1570/3000 [05:30<03:51, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1571/3000 [05:30<04:05, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1573/3000 [05:31<05:20, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1574/3000 [05:31<04:48, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1576/3000 [05:32<06:06, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1577/3000 [05:32<06:04, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1580/3000 [05:32<04:22, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1581/3000 [05:33<04:52, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1584/3000 [05:33<03:57, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1585/3000 [05:33<05:08, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1586/3000 [05:34<05:40, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1589/3000 [05:34<05:09, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1592/3000 [05:35<04:31, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1595/3000 [05:36<05:41, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1596/3000 [05:36<05:38, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1598/3000 [05:37<05:08, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1600/3000 [05:37<05:29, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1602/3000 [05:37<03:58, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1604/3000 [05:38<05:56, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1606/3000 [05:38<04:26, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1609/3000 [05:39<03:17, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1610/3000 [05:39<04:44, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1611/3000 [05:39<05:45, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1612/3000 [05:40<05:36, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1617/3000 [05:40<03:05, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1618/3000 [05:41<06:26, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1619/3000 [05:41<06:04, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1621/3000 [05:42<07:18, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1625/3000 [05:42<04:26, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1629/3000 [05:43<02:37, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1631/3000 [05:43<04:05, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1633/3000 [05:44<04:14, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1636/3000 [05:44<02:58, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1638/3000 [05:45<06:33, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1640/3000 [05:46<06:09, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1644/3000 [05:46<03:46, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1646/3000 [05:47<04:53, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1648/3000 [05:47<04:31, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1650/3000 [05:48<04:48, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1654/3000 [05:48<03:32, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1656/3000 [05:48<03:45, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1657/3000 [05:49<04:30, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1658/3000 [05:50<08:46, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1660/3000 [05:50<06:40, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1664/3000 [05:51<04:58, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1666/3000 [05:51<03:55, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1667/3000 [05:52<04:23, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1670/3000 [05:52<04:25, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1674/3000 [05:52<02:32, 8.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1676/3000 [05:53<03:24, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1677/3000 [05:53<04:07, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1678/3000 [05:54<04:39, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [05:54<07:37, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1684/3000 [05:55<04:03, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1685/3000 [05:55<03:45, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1686/3000 [05:55<04:13, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1688/3000 [05:56<04:10, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1690/3000 [05:56<03:55, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1692/3000 [05:56<04:08, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1695/3000 [05:57<03:22, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1696/3000 [05:57<05:14, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1698/3000 [05:58<05:05, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1700/3000 [05:58<04:49, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1702/3000 [05:59<05:23, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1705/3000 [05:59<03:23, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1707/3000 [06:00<03:45, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1709/3000 [06:00<04:28, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1712/3000 [06:01<05:05, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1714/3000 [06:01<05:19, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1715/3000 [06:02<04:32, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1716/3000 [06:02<04:41, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1717/3000 [06:02<06:22, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1720/3000 [06:03<04:24, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1723/3000 [06:03<04:09, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▊ | 1725/3000 [06:04<03:11, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1729/3000 [06:04<02:42, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1730/3000 [06:04<02:50, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1732/3000 [06:05<04:18, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1734/3000 [06:05<03:23, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1735/3000 [06:05<04:03, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1736/3000 [06:06<04:28, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1738/3000 [06:06<05:36, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1740/3000 [06:07<03:53, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1742/3000 [06:07<03:59, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1744/3000 [06:07<03:02, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1748/3000 [06:08<03:56, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1752/3000 [06:09<03:24, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1753/3000 [06:09<04:09, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1755/3000 [06:10<05:17, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1756/3000 [06:10<06:04, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1757/3000 [06:11<06:31, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1761/3000 [06:11<03:50, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1764/3000 [06:11<02:57, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1767/3000 [06:12<03:12, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1769/3000 [06:12<02:35, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1771/3000 [06:13<04:06, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1773/3000 [06:13<04:06, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1776/3000 [06:14<03:33, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1777/3000 [06:14<04:34, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1779/3000 [06:15<05:33, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1781/3000 [06:15<04:48, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1783/3000 [06:15<03:29, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1785/3000 [06:16<03:12, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1786/3000 [06:16<04:12, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1790/3000 [06:17<03:08, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1792/3000 [06:17<02:45, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1794/3000 [06:17<03:06, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1796/3000 [06:18<03:53, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1799/3000 [06:19<04:41, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1802/3000 [06:19<04:27, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1805/3000 [06:19<02:42, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1809/3000 [06:20<02:32, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1811/3000 [06:20<03:12, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1812/3000 [06:21<03:27, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1813/3000 [06:21<03:36, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1815/3000 [06:21<03:46, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1816/3000 [06:22<04:57, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1817/3000 [06:22<06:33, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1818/3000 [06:23<06:02, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1821/3000 [06:23<04:00, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1824/3000 [06:23<02:37, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1825/3000 [06:23<02:57, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1827/3000 [06:24<03:16, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1831/3000 [06:24<02:30, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1835/3000 [06:25<02:57, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1837/3000 [06:26<04:44, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1838/3000 [06:26<04:46, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1839/3000 [06:26<04:39, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1842/3000 [06:27<03:34, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1843/3000 [06:27<04:14, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1844/3000 [06:28<05:07, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1849/3000 [06:28<03:11, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1851/3000 [06:28<02:45, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1852/3000 [06:29<02:39, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1853/3000 [06:29<03:14, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1855/3000 [06:29<04:00, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1857/3000 [06:30<03:24, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1858/3000 [06:30<03:17, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1860/3000 [06:31<05:01, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1862/3000 [06:31<05:10, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1868/3000 [06:32<02:21, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1871/3000 [06:32<03:01, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1872/3000 [06:33<03:01, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1874/3000 [06:33<03:51, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▎ | 1875/3000 [06:34<05:30, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1876/3000 [06:34<06:10, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1879/3000 [06:35<04:36, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1881/3000 [06:35<03:20, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1883/3000 [06:36<04:07, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1884/3000 [06:36<03:45, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1888/3000 [06:36<02:30, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1894/3000 [06:37<01:59, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1896/3000 [06:38<03:37, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1897/3000 [06:39<05:59, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1898/3000 [06:39<05:47, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1902/3000 [06:39<03:26, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1903/3000 [06:40<04:53, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1907/3000 [06:40<02:58, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1909/3000 [06:41<03:09, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1912/3000 [06:41<02:03, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1915/3000 [06:41<03:03, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1916/3000 [06:42<03:18, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1917/3000 [06:42<04:48, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1919/3000 [06:43<05:43, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1921/3000 [06:43<03:41, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1923/3000 [06:44<03:27, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1924/3000 [06:44<03:52, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1928/3000 [06:44<02:30, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1932/3000 [06:44<01:45, 10.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1934/3000 [06:45<02:52, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1936/3000 [06:45<03:14, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1938/3000 [06:46<03:48, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1939/3000 [06:46<03:44, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1940/3000 [06:46<03:44, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1944/3000 [06:47<02:47, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1946/3000 [06:47<02:15, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1949/3000 [06:48<02:19, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1951/3000 [06:48<02:31, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1952/3000 [06:48<02:59, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1953/3000 [06:49<03:55, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1954/3000 [06:49<03:54, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1956/3000 [06:49<03:07, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1958/3000 [06:50<04:10, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1961/3000 [06:50<02:35, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1963/3000 [06:50<02:24, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1966/3000 [06:51<01:58, 8.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1967/3000 [06:51<02:38, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1969/3000 [06:51<02:29, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1973/3000 [06:53<04:30, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1975/3000 [06:53<03:51, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1977/3000 [06:54<03:36, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1979/3000 [06:54<02:56, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1982/3000 [06:54<02:08, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1983/3000 [06:54<02:20, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1986/3000 [06:55<02:10, 7.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1989/3000 [06:55<03:12, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1990/3000 [06:56<04:34, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1991/3000 [06:56<05:07, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1992/3000 [06:57<05:40, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1995/3000 [06:57<03:58, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1997/3000 [06:58<03:08, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2004/3000 [06:58<01:19, 12.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2006/3000 [06:58<01:42, 9.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2008/3000 [06:59<02:42, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2010/3000 [07:00<04:32, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2012/3000 [07:00<03:56, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2014/3000 [07:01<03:07, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2016/3000 [07:01<02:21, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2019/3000 [07:02<02:49, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2022/3000 [07:02<02:10, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2024/3000 [07:02<01:53, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2026/3000 [07:02<02:32, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2027/3000 [07:03<03:04, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2029/3000 [07:03<03:46, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2030/3000 [07:04<04:10, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2033/3000 [07:04<03:17, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2034/3000 [07:05<03:28, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2037/3000 [07:05<02:50, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2038/3000 [07:05<03:30, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [07:06<03:42, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2041/3000 [07:06<03:40, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2045/3000 [07:06<02:05, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2047/3000 [07:07<02:19, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2048/3000 [07:07<02:44, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2049/3000 [07:07<03:36, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2052/3000 [07:08<02:27, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2053/3000 [07:08<04:01, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2054/3000 [07:09<04:07, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2056/3000 [07:09<04:26, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2060/3000 [07:09<02:05, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2063/3000 [07:10<02:00, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2065/3000 [07:10<01:56, 8.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2067/3000 [07:11<02:24, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2069/3000 [07:11<02:42, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2072/3000 [07:11<02:28, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2074/3000 [07:13<04:27, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2076/3000 [07:13<04:46, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2080/3000 [07:14<02:57, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2081/3000 [07:14<03:00, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2082/3000 [07:14<03:04, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2085/3000 [07:15<02:46, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2087/3000 [07:15<02:35, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2088/3000 [07:15<03:15, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2089/3000 [07:15<03:23, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2092/3000 [07:16<02:14, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2094/3000 [07:16<03:20, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2096/3000 [07:17<02:41, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2099/3000 [07:17<02:31, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2102/3000 [07:18<02:36, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2103/3000 [07:18<02:31, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2106/3000 [07:19<03:32, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2107/3000 [07:19<03:15, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2108/3000 [07:20<04:20, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2110/3000 [07:20<03:53, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2112/3000 [07:20<03:16, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2116/3000 [07:21<02:13, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2118/3000 [07:21<02:00, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2119/3000 [07:22<03:21, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2120/3000 [07:22<03:28, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2122/3000 [07:22<03:11, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2124/3000 [07:23<03:37, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2127/3000 [07:24<03:15, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2128/3000 [07:24<02:55, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2131/3000 [07:24<02:24, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2135/3000 [07:25<01:50, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2137/3000 [07:25<02:37, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2138/3000 [07:25<02:55, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2142/3000 [07:26<02:08, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2143/3000 [07:26<02:49, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2146/3000 [07:27<02:56, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2148/3000 [07:27<02:45, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2150/3000 [07:28<02:45, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2151/3000 [07:28<02:35, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2154/3000 [07:28<02:09, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2155/3000 [07:29<04:00, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2156/3000 [07:29<04:10, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2159/3000 [07:29<02:34, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2160/3000 [07:30<02:54, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2163/3000 [07:30<02:31, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2165/3000 [07:31<03:01, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2166/3000 [07:31<03:37, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2167/3000 [07:31<03:29, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2169/3000 [07:32<04:18, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2172/3000 [07:33<02:32, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▎ | 2175/3000 [07:33<02:12, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2178/3000 [07:33<01:44, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2179/3000 [07:34<02:02, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2180/3000 [07:34<02:55, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2183/3000 [07:34<02:06, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2184/3000 [07:35<02:30, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2185/3000 [07:35<03:55, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2187/3000 [07:36<04:08, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2189/3000 [07:36<03:18, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2191/3000 [07:37<02:59, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2194/3000 [07:37<01:47, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2197/3000 [07:37<02:03, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2198/3000 [07:38<02:28, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2200/3000 [07:38<02:20, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2201/3000 [07:38<02:28, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2203/3000 [07:39<02:47, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2206/3000 [07:39<02:09, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2207/3000 [07:40<03:42, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2209/3000 [07:40<03:10, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2211/3000 [07:40<02:37, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2213/3000 [07:41<02:22, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2216/3000 [07:41<02:35, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2219/3000 [07:42<01:56, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2220/3000 [07:42<01:53, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2221/3000 [07:42<02:39, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2222/3000 [07:43<03:39, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2225/3000 [07:43<02:19, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2227/3000 [07:44<02:59, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2228/3000 [07:44<03:04, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2230/3000 [07:44<02:55, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2233/3000 [07:44<01:41, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2235/3000 [07:45<01:53, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2237/3000 [07:45<02:14, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2238/3000 [07:45<02:18, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2239/3000 [07:46<02:51, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2241/3000 [07:46<03:17, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2242/3000 [07:47<03:08, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2244/3000 [07:47<03:40, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2245/3000 [07:48<04:01, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2247/3000 [07:48<03:11, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2250/3000 [07:48<01:55, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2251/3000 [07:49<02:42, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2255/3000 [07:49<02:02, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2257/3000 [07:49<01:34, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2259/3000 [07:50<02:19, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2260/3000 [07:50<02:22, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2264/3000 [07:51<02:01, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2265/3000 [07:51<02:43, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2266/3000 [07:52<03:05, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2269/3000 [07:52<02:50, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2274/3000 [07:53<01:43, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2276/3000 [07:53<01:32, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2279/3000 [07:54<01:56, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2280/3000 [07:54<02:10, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2282/3000 [07:54<02:29, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2283/3000 [07:54<02:11, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2284/3000 [07:55<04:26, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2288/3000 [07:56<02:20, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2289/3000 [07:56<02:30, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2296/3000 [07:57<01:16, 9.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2298/3000 [07:57<01:08, 10.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2300/3000 [07:58<02:44, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2302/3000 [07:58<02:48, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2304/3000 [07:59<02:28, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2306/3000 [07:59<02:33, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2307/3000 [08:00<02:44, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2309/3000 [08:00<02:34, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2313/3000 [08:00<01:33, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2316/3000 [08:01<01:32, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2317/3000 [08:01<01:30, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2318/3000 [08:01<01:48, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2319/3000 [08:02<02:46, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2320/3000 [08:02<03:08, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2322/3000 [08:02<02:39, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2325/3000 [08:03<02:16, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2328/3000 [08:03<02:14, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2329/3000 [08:04<02:41, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2334/3000 [08:04<01:45, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2337/3000 [08:05<01:31, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2338/3000 [08:05<01:38, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2339/3000 [08:05<01:56, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2340/3000 [08:06<02:14, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2344/3000 [08:06<01:59, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2345/3000 [08:07<02:05, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2348/3000 [08:07<02:33, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2350/3000 [08:08<02:29, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2353/3000 [08:08<01:49, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2355/3000 [08:09<02:02, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2361/3000 [08:09<01:12, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2363/3000 [08:10<01:26, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2365/3000 [08:11<02:28, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2367/3000 [08:11<01:59, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2372/3000 [08:11<01:35, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2378/3000 [08:13<01:38, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2380/3000 [08:13<01:37, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2384/3000 [08:14<02:18, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2387/3000 [08:15<02:00, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2388/3000 [08:15<01:52, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2390/3000 [08:15<01:41, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2392/3000 [08:16<02:07, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2396/3000 [08:17<01:40, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2398/3000 [08:17<01:21, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2400/3000 [08:18<02:49, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2403/3000 [08:18<02:12, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2405/3000 [08:19<01:50, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2408/3000 [08:19<01:35, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2410/3000 [08:19<01:33, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2413/3000 [08:20<01:24, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2414/3000 [08:20<01:49, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2417/3000 [08:21<01:48, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2418/3000 [08:21<01:58, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2420/3000 [08:21<01:53, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2422/3000 [08:22<01:43, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2424/3000 [08:22<01:55, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2428/3000 [08:23<01:26, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2430/3000 [08:23<01:28, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2431/3000 [08:23<01:34, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2432/3000 [08:24<02:01, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2433/3000 [08:24<02:09, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2434/3000 [08:24<02:25, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2436/3000 [08:25<01:52, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2437/3000 [08:25<02:03, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2438/3000 [08:25<02:04, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2439/3000 [08:25<02:24, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2440/3000 [08:26<02:18, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2442/3000 [08:26<01:54, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2445/3000 [08:26<01:32, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2447/3000 [08:27<01:15, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2450/3000 [08:27<01:11, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2452/3000 [08:27<01:28, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2453/3000 [08:28<01:45, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2454/3000 [08:28<01:47, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2456/3000 [08:28<01:30, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2458/3000 [08:29<02:06, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2461/3000 [08:29<01:19, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2462/3000 [08:29<01:28, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2465/3000 [08:30<01:34, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2467/3000 [08:30<01:17, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2468/3000 [08:30<01:36, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2469/3000 [08:31<01:53, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2471/3000 [08:31<01:57, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2473/3000 [08:32<01:50, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2474/3000 [08:32<01:38, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2476/3000 [08:32<01:45, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2480/3000 [08:33<01:43, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2482/3000 [08:33<01:27, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2485/3000 [08:34<01:19, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2486/3000 [08:34<01:50, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2488/3000 [08:34<01:28, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2489/3000 [08:35<02:13, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2492/3000 [08:35<01:34, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2494/3000 [08:36<02:22, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2501/3000 [08:37<01:04, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2506/3000 [08:38<01:13, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2508/3000 [08:38<01:32, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2510/3000 [08:39<01:46, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2512/3000 [08:39<01:24, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2514/3000 [08:39<01:33, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2516/3000 [08:40<01:40, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2517/3000 [08:40<01:33, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2520/3000 [08:41<01:32, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2522/3000 [08:41<01:24, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2524/3000 [08:41<01:15, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2525/3000 [08:42<01:28, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2527/3000 [08:42<01:14, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2528/3000 [08:42<01:24, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2531/3000 [08:42<01:20, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2532/3000 [08:43<01:13, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2534/3000 [08:43<01:37, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2536/3000 [08:44<01:24, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2537/3000 [08:44<01:28, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2540/3000 [08:44<01:07, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2543/3000 [08:45<01:56, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2546/3000 [08:45<01:10, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2548/3000 [08:46<01:03, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2550/3000 [08:46<01:12, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2552/3000 [08:47<01:50, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2555/3000 [08:47<01:06, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2557/3000 [08:48<01:21, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2561/3000 [08:48<01:17, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2567/3000 [08:49<00:56, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2569/3000 [08:50<01:42, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2570/3000 [08:50<01:37, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2572/3000 [08:51<01:28, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2575/3000 [08:52<01:36, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2576/3000 [08:52<01:39, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2577/3000 [08:52<01:36, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2579/3000 [08:52<01:22, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2581/3000 [08:53<01:12, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2583/3000 [08:53<01:12, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2585/3000 [08:53<00:57, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2587/3000 [08:53<01:01, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2588/3000 [08:54<02:05, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2593/3000 [08:55<01:00, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2595/3000 [08:55<00:54, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2597/3000 [08:56<01:12, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2599/3000 [08:56<01:10, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2600/3000 [08:56<01:04, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2601/3000 [08:57<01:45, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2603/3000 [08:57<01:49, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2604/3000 [08:57<01:33, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2606/3000 [08:58<01:15, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2608/3000 [08:58<01:26, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2612/3000 [08:59<00:58, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2614/3000 [08:59<00:48, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2616/3000 [08:59<00:49, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2617/3000 [09:00<01:13, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2619/3000 [09:00<01:22, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2620/3000 [09:00<01:21, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2622/3000 [09:01<01:57, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2626/3000 [09:01<00:58, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2628/3000 [09:02<01:03, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2630/3000 [09:02<01:00, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2631/3000 [09:03<01:11, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2635/3000 [09:03<00:50, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2636/3000 [09:03<00:52, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2639/3000 [09:04<00:55, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2640/3000 [09:04<01:01, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2641/3000 [09:05<02:14, 2.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2642/3000 [09:05<02:03, 2.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2643/3000 [09:05<02:03, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2644/3000 [09:06<02:11, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2648/3000 [09:06<01:01, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2649/3000 [09:06<01:11, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2652/3000 [09:07<00:55, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2654/3000 [09:07<00:51, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2656/3000 [09:07<00:51, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [09:08<00:45, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2659/3000 [09:08<00:46, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2661/3000 [09:08<01:03, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2662/3000 [09:09<01:10, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2663/3000 [09:09<01:34, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2665/3000 [09:09<01:11, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2666/3000 [09:10<01:16, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2669/3000 [09:10<00:57, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2672/3000 [09:11<01:06, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2676/3000 [09:11<00:47, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2677/3000 [09:11<00:56, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2679/3000 [09:12<01:06, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2681/3000 [09:12<01:00, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2682/3000 [09:13<01:16, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2684/3000 [09:13<01:13, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2687/3000 [09:14<00:51, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2690/3000 [09:14<00:42, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2692/3000 [09:14<00:35, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2694/3000 [09:15<00:51, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2696/3000 [09:15<00:38, 7.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2698/3000 [09:16<01:20, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2699/3000 [09:16<01:27, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2703/3000 [09:17<00:55, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2705/3000 [09:17<00:49, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2706/3000 [09:17<00:54, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2709/3000 [09:18<01:01, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2713/3000 [09:18<00:34, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2715/3000 [09:19<01:02, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2717/3000 [09:20<01:12, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2719/3000 [09:20<01:04, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2722/3000 [09:21<00:51, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2723/3000 [09:21<00:59, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2725/3000 [09:21<00:58, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2730/3000 [09:22<00:39, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2731/3000 [09:22<00:38, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2732/3000 [09:22<00:57, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2735/3000 [09:23<00:49, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2737/3000 [09:23<00:51, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2741/3000 [09:24<00:37, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2742/3000 [09:24<00:37, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2743/3000 [09:24<00:40, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2745/3000 [09:25<01:03, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2746/3000 [09:26<01:35, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2748/3000 [09:26<01:11, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2752/3000 [09:27<00:41, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2753/3000 [09:27<00:37, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2754/3000 [09:27<00:46, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2758/3000 [09:27<00:29, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2760/3000 [09:28<00:28, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2762/3000 [09:28<00:38, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2763/3000 [09:29<00:57, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2764/3000 [09:29<01:15, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2765/3000 [09:30<01:09, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2768/3000 [09:30<01:08, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2769/3000 [09:31<01:10, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▎| 2775/3000 [09:31<00:29, 7.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2777/3000 [09:32<00:29, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2779/3000 [09:32<00:31, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [09:32<00:29, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2783/3000 [09:33<01:04, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2784/3000 [09:34<01:02, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2786/3000 [09:34<00:53, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2788/3000 [09:34<00:46, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2791/3000 [09:35<00:33, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2793/3000 [09:35<00:28, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2795/3000 [09:36<00:34, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2798/3000 [09:36<00:36, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2799/3000 [09:36<00:34, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2801/3000 [09:37<00:35, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2805/3000 [09:37<00:31, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2806/3000 [09:38<00:32, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2809/3000 [09:38<00:29, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2811/3000 [09:39<00:40, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2813/3000 [09:39<00:40, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2815/3000 [09:40<00:49, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2817/3000 [09:40<00:46, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2819/3000 [09:40<00:36, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2821/3000 [09:41<00:31, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2822/3000 [09:41<00:37, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2824/3000 [09:41<00:32, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2826/3000 [09:42<00:27, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2828/3000 [09:42<00:26, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2829/3000 [09:42<00:40, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2830/3000 [09:43<00:56, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2832/3000 [09:44<00:51, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2833/3000 [09:44<00:47, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2836/3000 [09:44<00:29, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2839/3000 [09:45<00:25, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2841/3000 [09:45<00:20, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2843/3000 [09:45<00:27, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2847/3000 [09:46<00:27, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2848/3000 [09:46<00:28, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2850/3000 [09:47<00:28, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2851/3000 [09:47<00:32, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2852/3000 [09:48<01:00, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2856/3000 [09:49<00:32, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2858/3000 [09:49<00:38, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2862/3000 [09:50<00:22, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2864/3000 [09:50<00:22, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2867/3000 [09:50<00:20, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2868/3000 [09:51<00:19, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2869/3000 [09:51<00:36, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2872/3000 [09:52<00:36, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2876/3000 [09:53<00:20, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2878/3000 [09:53<00:19, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2879/3000 [09:53<00:20, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2882/3000 [09:54<00:21, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2884/3000 [09:55<00:29, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2886/3000 [09:55<00:28, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2888/3000 [09:55<00:22, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2889/3000 [09:56<00:28, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2891/3000 [09:56<00:25, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2894/3000 [09:57<00:18, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2896/3000 [09:57<00:13, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2898/3000 [09:57<00:10, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2901/3000 [09:57<00:13, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2902/3000 [09:58<00:14, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2904/3000 [09:58<00:22, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2905/3000 [09:59<00:24, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2907/3000 [09:59<00:20, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2909/3000 [10:00<00:22, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2910/3000 [10:00<00:24, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2911/3000 [10:00<00:25, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2916/3000 [10:01<00:12, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2919/3000 [10:01<00:11, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2920/3000 [10:02<00:13, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2922/3000 [10:02<00:12, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2925/3000 [10:02<00:13, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2926/3000 [10:03<00:14, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2928/3000 [10:03<00:17, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2930/3000 [10:04<00:14, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2933/3000 [10:04<00:12, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2934/3000 [10:04<00:12, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2935/3000 [10:05<00:13, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2936/3000 [10:05<00:13, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2938/3000 [10:05<00:10, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2943/3000 [10:06<00:06, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2950/3000 [10:07<00:09, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2952/3000 [10:08<00:09, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2953/3000 [10:08<00:10, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2955/3000 [10:08<00:09, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2956/3000 [10:09<00:09, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2959/3000 [10:09<00:06, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2964/3000 [10:09<00:03, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2966/3000 [10:10<00:05, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2967/3000 [10:11<00:08, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2970/3000 [10:11<00:07, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2971/3000 [10:12<00:06, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2974/3000 [10:12<00:04, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2977/3000 [10:12<00:03, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2980/3000 [10:13<00:02, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2984/3000 [10:13<00:02, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2986/3000 [10:14<00:02, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2988/3000 [10:14<00:02, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2989/3000 [10:15<00:02, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2991/3000 [10:15<00:01, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2992/3000 [10:15<00:02, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2996/3000 [10:16<00:00, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2999/3000 [10:16<00:00, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [10:17<00:00, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:14:53.834\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1mEvaluation metrics (after optimization): {'f1': 0.001, 'em': 0.001, 'acc': 0.52}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "fd546a1e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'f1': 0.001, 'em': 0.001, 'acc': 0.52}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "78d5904e",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# len(benchmark._train_data)\n",
"import numpy as np\n",
"np.random.seed(2024)\n",
"out = np.random.choice(benchmark._train_data, size=150, replace=False)\n",
"benchmark._train_data = out\n",
"benchmark._dev_data = out"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d686ee20",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:24:02.443\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 1/3000 [00:02<2:12:06, 2.64s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 4/3000 [00:03<25:39, 1.95it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 6/3000 [00:03<15:00, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 9/3000 [00:03<11:09, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 12/3000 [00:03<08:20, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 16/3000 [00:04<05:52, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 18/3000 [00:04<06:56, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 20/3000 [00:04<06:44, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 21/3000 [00:05<14:24, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 22/3000 [00:06<14:23, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 24/3000 [00:06<14:56, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 27/3000 [00:07<10:43, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 30/3000 [00:07<07:19, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 32/3000 [00:07<06:28, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 35/3000 [00:07<05:13, 9.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 37/3000 [00:09<12:36, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 38/3000 [00:09<12:39, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 40/3000 [00:09<10:43, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 41/3000 [00:10<11:39, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 42/3000 [00:10<13:19, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 45/3000 [00:10<09:26, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 47/3000 [00:11<08:48, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 50/3000 [00:11<08:18, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 52/3000 [00:11<07:39, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 53/3000 [00:12<11:10, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 56/3000 [00:12<08:14, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 58/3000 [00:13<10:59, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 62/3000 [00:13<07:59, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 63/3000 [00:13<08:31, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 65/3000 [00:14<10:21, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 68/3000 [00:14<06:57, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 69/3000 [00:15<08:23, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 72/3000 [00:15<06:19, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 73/3000 [00:16<12:10, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 76/3000 [00:16<08:46, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 78/3000 [00:16<08:18, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 80/3000 [00:17<10:05, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 81/3000 [00:17<09:41, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 84/3000 [00:18<08:19, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 85/3000 [00:18<12:43, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 87/3000 [00:18<10:50, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 88/3000 [00:19<11:36, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 90/3000 [00:19<09:34, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 91/3000 [00:19<09:38, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 94/3000 [00:20<11:51, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 95/3000 [00:20<12:27, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 96/3000 [00:21<12:05, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 97/3000 [00:21<12:51, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 99/3000 [00:21<10:53, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 102/3000 [00:22<10:28, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 103/3000 [00:22<11:11, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 104/3000 [00:23<13:14, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 106/3000 [00:23<10:11, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:23<11:15, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 111/3000 [00:24<08:42, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 113/3000 [00:24<08:52, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 115/3000 [00:25<08:28, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 120/3000 [00:25<08:05, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 121/3000 [00:26<10:19, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 123/3000 [00:26<10:23, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 124/3000 [00:27<09:20, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 127/3000 [00:27<08:16, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 129/3000 [00:27<06:29, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 132/3000 [00:28<07:26, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:28<08:04, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 135/3000 [00:28<08:21, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 136/3000 [00:29<07:57, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 138/3000 [00:29<07:29, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 140/3000 [00:29<06:08, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 143/3000 [00:30<13:16, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 145/3000 [00:31<14:50, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 147/3000 [00:31<09:46, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 151/3000 [00:32<06:14, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:32<06:25, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 155/3000 [00:32<07:30, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 157/3000 [00:33<06:53, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 159/3000 [00:33<08:18, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 161/3000 [00:34<09:58, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 163/3000 [00:35<13:44, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 166/3000 [00:36<13:32, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 167/3000 [00:36<11:45, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 170/3000 [00:36<08:03, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 172/3000 [00:36<09:11, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 174/3000 [00:37<08:42, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 175/3000 [00:37<08:58, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 176/3000 [00:37<10:19, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 179/3000 [00:38<10:17, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 181/3000 [00:39<11:12, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 184/3000 [00:39<08:56, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 185/3000 [00:39<09:01, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 188/3000 [00:40<10:29, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 190/3000 [00:40<10:16, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 192/3000 [00:41<09:10, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 194/3000 [00:41<07:09, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 197/3000 [00:42<08:07, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 199/3000 [00:42<07:52, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 200/3000 [00:42<09:11, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 202/3000 [00:43<09:33, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 205/3000 [00:43<06:51, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 206/3000 [00:43<06:37, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 207/3000 [00:44<11:12, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 208/3000 [00:44<11:06, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 210/3000 [00:44<10:03, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 211/3000 [00:45<11:55, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 218/3000 [00:46<06:21, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 221/3000 [00:47<10:38, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 223/3000 [00:47<09:40, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 226/3000 [00:47<08:18, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 228/3000 [00:48<10:06, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 229/3000 [00:48<10:52, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 230/3000 [00:49<11:27, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 231/3000 [00:49<13:30, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 236/3000 [00:50<07:41, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 238/3000 [00:50<08:23, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 239/3000 [00:50<08:26, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 241/3000 [00:51<09:22, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 243/3000 [00:51<08:07, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 246/3000 [00:51<07:03, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 247/3000 [00:52<07:21, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 251/3000 [00:53<11:16, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 252/3000 [00:53<13:45, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 254/3000 [00:54<11:29, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 257/3000 [00:54<07:04, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 258/3000 [00:54<07:14, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 261/3000 [00:55<07:59, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 264/3000 [00:55<05:42, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 265/3000 [00:56<18:45, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 268/3000 [00:57<11:57, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 269/3000 [00:57<12:39, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 272/3000 [00:57<08:19, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 274/3000 [00:58<08:22, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 277/3000 [00:58<06:00, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 279/3000 [00:58<05:33, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 281/3000 [00:59<06:21, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 282/3000 [00:59<09:37, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 284/3000 [00:59<08:35, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 285/3000 [01:00<12:48, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 288/3000 [01:01<09:09, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 289/3000 [01:01<12:03, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 291/3000 [01:02<13:40, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 293/3000 [01:02<09:50, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 294/3000 [01:02<08:37, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 298/3000 [01:03<07:22, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 302/3000 [01:03<04:26, 10.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 304/3000 [01:04<09:56, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [01:04<09:08, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 307/3000 [01:05<10:31, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 308/3000 [01:05<11:00, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 309/3000 [01:05<11:26, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 311/3000 [01:06<13:56, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 315/3000 [01:07<08:50, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 318/3000 [01:07<08:13, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 320/3000 [01:07<07:00, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 323/3000 [01:08<05:43, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 325/3000 [01:08<07:04, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 326/3000 [01:08<07:55, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 327/3000 [01:09<08:58, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 328/3000 [01:09<09:43, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 329/3000 [01:09<10:38, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 330/3000 [01:10<11:52, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 331/3000 [01:10<12:45, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 332/3000 [01:10<12:35, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 334/3000 [01:10<09:18, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 335/3000 [01:11<11:44, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 340/3000 [01:11<06:33, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 343/3000 [01:12<06:57, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 345/3000 [01:12<07:27, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 346/3000 [01:13<08:46, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 347/3000 [01:13<11:44, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 349/3000 [01:13<09:24, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 352/3000 [01:14<08:29, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 353/3000 [01:14<08:34, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 356/3000 [01:15<09:32, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 359/3000 [01:15<06:07, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 361/3000 [01:15<05:38, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 363/3000 [01:16<10:00, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 365/3000 [01:17<10:01, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 369/3000 [01:18<08:27, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 373/3000 [01:18<07:05, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▎ | 375/3000 [01:19<07:56, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 379/3000 [01:19<06:00, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 381/3000 [01:20<07:10, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 382/3000 [01:20<08:38, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 385/3000 [01:21<08:19, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 386/3000 [01:21<10:51, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 390/3000 [01:22<09:33, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 393/3000 [01:22<08:20, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 396/3000 [01:23<06:21, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 397/3000 [01:23<05:59, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 401/3000 [01:24<06:37, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 403/3000 [01:24<06:13, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 405/3000 [01:25<10:46, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [01:25<08:22, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 410/3000 [01:26<12:08, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 411/3000 [01:26<14:15, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 413/3000 [01:27<11:29, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 416/3000 [01:27<08:37, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 418/3000 [01:28<08:59, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 420/3000 [01:28<07:08, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 421/3000 [01:28<07:19, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 425/3000 [01:28<05:36, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 427/3000 [01:29<09:56, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 429/3000 [01:30<08:10, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 430/3000 [01:30<11:44, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 434/3000 [01:31<08:55, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 435/3000 [01:31<08:10, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 438/3000 [01:31<07:04, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 441/3000 [01:32<06:23, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 445/3000 [01:32<04:55, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 447/3000 [01:33<07:32, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 450/3000 [01:34<11:47, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 452/3000 [01:35<12:02, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 453/3000 [01:35<11:03, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 454/3000 [01:35<11:29, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 456/3000 [01:36<10:28, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 457/3000 [01:36<12:19, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 459/3000 [01:36<10:13, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 464/3000 [01:37<05:12, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 466/3000 [01:37<06:26, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 468/3000 [01:37<06:05, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 470/3000 [01:38<10:43, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 471/3000 [01:39<16:15, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 472/3000 [01:40<16:17, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 473/3000 [01:40<16:17, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 475/3000 [01:41<15:00, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 477/3000 [01:41<11:41, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 482/3000 [01:42<06:56, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 483/3000 [01:42<07:07, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [01:42<07:56, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 486/3000 [01:43<09:14, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 487/3000 [01:43<09:45, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 488/3000 [01:43<11:22, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 489/3000 [01:44<13:21, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 492/3000 [01:44<09:13, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 493/3000 [01:45<13:29, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 494/3000 [01:45<13:33, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 496/3000 [01:45<10:56, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 498/3000 [01:46<08:03, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 499/3000 [01:46<09:25, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 500/3000 [01:46<10:22, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 501/3000 [01:47<12:56, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 505/3000 [01:47<07:24, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 506/3000 [01:48<08:04, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 508/3000 [01:48<07:09, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 510/3000 [01:48<06:20, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 512/3000 [01:49<09:05, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 514/3000 [01:49<08:08, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 515/3000 [01:50<13:30, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 518/3000 [01:51<11:00, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 521/3000 [01:51<06:41, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 525/3000 [01:51<04:50, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 527/3000 [01:52<08:06, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 530/3000 [01:52<07:05, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 531/3000 [01:53<11:09, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 533/3000 [01:54<11:41, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 535/3000 [01:54<10:19, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 539/3000 [01:55<06:51, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 540/3000 [01:55<06:51, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 544/3000 [01:55<04:53, 8.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 546/3000 [01:55<04:46, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [01:56<05:46, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 550/3000 [01:57<10:41, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 554/3000 [01:58<09:47, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 556/3000 [01:58<09:47, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 557/3000 [01:59<11:42, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 558/3000 [01:59<13:57, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 559/3000 [02:00<15:16, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 561/3000 [02:00<10:23, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 563/3000 [02:00<08:41, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 564/3000 [02:00<08:59, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 568/3000 [02:02<10:55, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 570/3000 [02:02<09:06, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 572/3000 [02:03<10:44, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 573/3000 [02:03<11:24, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 576/3000 [02:04<10:00, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 578/3000 [02:04<07:27, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 580/3000 [02:04<07:05, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 585/3000 [02:05<06:12, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 587/3000 [02:06<07:40, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 589/3000 [02:07<10:48, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 591/3000 [02:07<08:51, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 592/3000 [02:07<09:32, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 593/3000 [02:08<11:17, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 597/3000 [02:08<07:15, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 599/3000 [02:08<05:28, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 601/3000 [02:09<08:09, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 603/3000 [02:09<09:12, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 604/3000 [02:10<08:42, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 605/3000 [02:10<11:58, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 607/3000 [02:11<10:20, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 610/3000 [02:11<05:32, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 612/3000 [02:11<05:14, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 614/3000 [02:12<08:30, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 615/3000 [02:12<09:30, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 618/3000 [02:12<06:59, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 620/3000 [02:13<05:58, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 621/3000 [02:13<07:19, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 622/3000 [02:13<08:44, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 623/3000 [02:14<10:50, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 625/3000 [02:14<08:26, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 627/3000 [02:14<08:40, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 630/3000 [02:15<06:18, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 634/3000 [02:15<04:42, 8.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 636/3000 [02:15<04:51, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 638/3000 [02:16<09:22, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 640/3000 [02:17<07:29, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 642/3000 [02:17<07:33, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 643/3000 [02:17<09:05, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 645/3000 [02:18<08:54, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 646/3000 [02:18<09:32, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 648/3000 [02:19<10:06, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 650/3000 [02:19<09:00, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 652/3000 [02:19<07:49, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 654/3000 [02:20<07:35, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 655/3000 [02:20<07:27, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 657/3000 [02:20<07:18, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 658/3000 [02:21<08:08, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 660/3000 [02:21<09:49, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 662/3000 [02:22<10:13, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 666/3000 [02:22<06:33, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 667/3000 [02:22<06:18, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 669/3000 [02:23<07:01, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 671/3000 [02:23<05:16, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 674/3000 [02:24<05:32, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 677/3000 [02:24<06:36, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 680/3000 [02:24<04:45, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 681/3000 [02:25<07:28, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 682/3000 [02:25<07:59, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 683/3000 [02:25<08:06, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 684/3000 [02:26<10:42, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 686/3000 [02:26<09:27, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 692/3000 [02:27<04:59, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 693/3000 [02:27<05:58, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 696/3000 [02:27<05:04, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 698/3000 [02:28<08:38, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 700/3000 [02:28<07:15, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 702/3000 [02:29<06:22, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 703/3000 [02:29<09:16, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 705/3000 [02:30<09:52, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 707/3000 [02:30<08:06, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 709/3000 [02:31<08:17, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 712/3000 [02:31<06:27, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 717/3000 [02:32<06:06, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 719/3000 [02:32<06:00, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 720/3000 [02:32<06:42, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 721/3000 [02:33<08:27, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 725/3000 [02:33<05:58, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 728/3000 [02:34<05:59, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 730/3000 [02:34<07:09, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 731/3000 [02:35<08:43, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 732/3000 [02:35<12:04, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 736/3000 [02:36<06:58, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 738/3000 [02:36<05:41, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 742/3000 [02:36<04:16, 8.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 744/3000 [02:37<07:40, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 746/3000 [02:37<08:10, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 748/3000 [02:38<09:06, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 749/3000 [02:38<09:05, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 750/3000 [02:39<10:37, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 752/3000 [02:39<09:37, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 754/3000 [02:39<07:55, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 755/3000 [02:40<08:53, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 757/3000 [02:40<09:24, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 759/3000 [02:41<08:02, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 762/3000 [02:41<06:38, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 766/3000 [02:41<05:54, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 767/3000 [02:42<05:31, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 769/3000 [02:42<09:09, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 772/3000 [02:43<10:02, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 775/3000 [02:44<07:16, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 776/3000 [02:44<07:10, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 777/3000 [02:44<07:32, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 780/3000 [02:45<07:33, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 782/3000 [02:45<05:50, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 785/3000 [02:46<06:04, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 786/3000 [02:46<05:58, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 789/3000 [02:46<05:28, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 791/3000 [02:47<06:52, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 792/3000 [02:47<07:35, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 794/3000 [02:47<06:52, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 795/3000 [02:48<07:32, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 796/3000 [02:48<10:08, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 799/3000 [02:48<07:01, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 800/3000 [02:49<06:14, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 803/3000 [02:49<06:34, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 804/3000 [02:49<06:55, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 806/3000 [02:50<08:20, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 807/3000 [02:50<08:45, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 810/3000 [02:51<08:06, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 811/3000 [02:51<08:47, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 814/3000 [02:52<06:39, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 818/3000 [02:52<05:29, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 819/3000 [02:53<07:34, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 820/3000 [02:53<07:55, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 821/3000 [02:53<08:39, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 822/3000 [02:54<09:32, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 825/3000 [02:54<07:58, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 826/3000 [02:55<07:52, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 828/3000 [02:55<08:37, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 830/3000 [02:55<06:06, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 832/3000 [02:55<05:30, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 835/3000 [02:56<04:20, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 836/3000 [02:56<05:08, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 838/3000 [02:56<05:22, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 839/3000 [02:57<07:38, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 841/3000 [02:57<07:08, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 843/3000 [02:58<07:29, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 845/3000 [02:58<06:16, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 846/3000 [02:58<09:30, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 849/3000 [02:59<06:53, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 853/3000 [02:59<04:24, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 855/3000 [03:00<05:21, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 858/3000 [03:00<06:24, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 859/3000 [03:00<05:45, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 861/3000 [03:01<05:10, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 864/3000 [03:02<07:50, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 866/3000 [03:02<06:03, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 867/3000 [03:02<07:06, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 869/3000 [03:02<06:14, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 871/3000 [03:03<06:57, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 874/3000 [03:04<06:24, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 875/3000 [03:04<07:18, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 877/3000 [03:04<06:48, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 879/3000 [03:05<07:31, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 882/3000 [03:05<05:30, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 883/3000 [03:06<09:13, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 884/3000 [03:06<09:11, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 885/3000 [03:06<08:49, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 887/3000 [03:07<08:52, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 889/3000 [03:07<07:27, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 890/3000 [03:07<07:26, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 892/3000 [03:07<05:53, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 895/3000 [03:08<05:40, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 898/3000 [03:08<05:06, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 902/3000 [03:09<04:55, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 903/3000 [03:10<09:28, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 904/3000 [03:10<10:11, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 906/3000 [03:10<08:23, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 910/3000 [03:11<06:05, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 913/3000 [03:11<05:31, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 914/3000 [03:12<06:28, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 915/3000 [03:12<07:43, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 916/3000 [03:12<08:36, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 918/3000 [03:12<06:33, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 921/3000 [03:13<04:53, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 923/3000 [03:14<08:08, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 926/3000 [03:14<06:26, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 927/3000 [03:14<06:17, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 931/3000 [03:15<04:28, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 933/3000 [03:15<07:02, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 934/3000 [03:16<08:52, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 935/3000 [03:16<08:42, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 939/3000 [03:17<04:49, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 941/3000 [03:17<06:10, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 943/3000 [03:17<05:25, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 944/3000 [03:17<05:51, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 947/3000 [03:18<04:49, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 949/3000 [03:18<04:01, 8.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 950/3000 [03:18<06:57, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 951/3000 [03:19<07:14, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 953/3000 [03:20<09:12, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 954/3000 [03:20<10:05, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 957/3000 [03:20<07:08, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 958/3000 [03:20<06:37, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 960/3000 [03:21<06:59, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 964/3000 [03:21<04:53, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 966/3000 [03:22<05:47, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 968/3000 [03:22<05:18, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 970/3000 [03:23<06:42, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 971/3000 [03:23<07:42, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 972/3000 [03:23<08:08, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▎ | 975/3000 [03:24<06:06, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 977/3000 [03:24<05:35, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 979/3000 [03:24<05:47, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 982/3000 [03:24<04:08, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 983/3000 [03:25<04:05, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 985/3000 [03:25<04:11, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 986/3000 [03:25<06:19, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 989/3000 [03:26<04:53, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 990/3000 [03:26<10:37, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 993/3000 [03:27<06:50, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 994/3000 [03:27<07:24, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 998/3000 [03:28<05:01, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1001/3000 [03:28<05:57, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1002/3000 [03:29<06:37, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1004/3000 [03:29<06:24, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1009/3000 [03:29<03:29, 9.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1011/3000 [03:30<05:39, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1012/3000 [03:31<08:08, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1015/3000 [03:31<07:27, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1022/3000 [03:32<05:05, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1024/3000 [03:32<04:30, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1026/3000 [03:33<05:20, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1028/3000 [03:33<05:03, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1030/3000 [03:33<04:58, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1033/3000 [03:34<05:39, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1034/3000 [03:34<05:45, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1037/3000 [03:35<06:59, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1039/3000 [03:36<06:33, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1042/3000 [03:36<06:27, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1045/3000 [03:36<04:40, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1046/3000 [03:37<04:43, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1050/3000 [03:37<04:45, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1052/3000 [03:38<07:29, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1054/3000 [03:38<05:42, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1057/3000 [03:39<06:55, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1059/3000 [03:39<05:59, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1061/3000 [03:40<06:00, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1064/3000 [03:41<06:39, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1068/3000 [03:41<03:52, 8.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1072/3000 [03:42<04:35, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1074/3000 [03:42<06:44, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1077/3000 [03:43<05:11, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1081/3000 [03:43<05:20, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1084/3000 [03:44<07:38, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1090/3000 [03:45<04:51, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1092/3000 [03:45<04:52, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1095/3000 [03:46<04:45, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1096/3000 [03:46<05:16, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1097/3000 [03:47<07:05, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1099/3000 [03:47<06:20, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1100/3000 [03:47<06:36, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1102/3000 [03:48<07:25, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1103/3000 [03:48<06:29, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1105/3000 [03:49<07:41, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1107/3000 [03:49<05:44, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1111/3000 [03:50<07:08, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1114/3000 [03:50<05:14, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1116/3000 [03:50<05:13, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1117/3000 [03:51<05:27, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1119/3000 [03:51<08:09, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1122/3000 [03:52<06:26, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1127/3000 [03:53<04:14, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1130/3000 [03:53<04:13, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1133/3000 [03:53<03:38, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1134/3000 [03:53<04:12, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1136/3000 [03:54<08:24, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1137/3000 [03:55<08:57, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1140/3000 [03:56<07:25, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1141/3000 [03:56<06:28, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1143/3000 [03:56<05:24, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1146/3000 [03:56<04:19, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1148/3000 [03:57<04:06, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1150/3000 [03:57<04:04, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1151/3000 [03:57<07:14, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1152/3000 [03:58<08:40, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1154/3000 [03:58<08:40, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1156/3000 [03:59<07:45, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1161/3000 [04:00<06:02, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1164/3000 [04:00<05:29, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1167/3000 [04:00<04:46, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [04:01<05:48, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1170/3000 [04:01<06:50, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1172/3000 [04:02<07:31, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1175/3000 [04:02<04:57, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1178/3000 [04:03<04:27, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1181/3000 [04:04<06:12, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1183/3000 [04:04<05:42, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1185/3000 [04:05<06:02, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1187/3000 [04:05<04:23, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1189/3000 [04:05<06:03, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1190/3000 [04:06<07:05, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1192/3000 [04:06<07:13, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1194/3000 [04:07<06:22, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1195/3000 [04:07<08:24, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1196/3000 [04:07<08:56, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1197/3000 [04:08<10:03, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1199/3000 [04:08<07:22, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1202/3000 [04:09<05:49, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1205/3000 [04:09<04:01, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1208/3000 [04:09<04:09, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1209/3000 [04:09<05:05, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1212/3000 [04:10<04:50, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1214/3000 [04:11<06:33, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1216/3000 [04:11<05:04, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1218/3000 [04:11<05:26, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1220/3000 [04:13<10:38, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1222/3000 [04:13<06:58, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1225/3000 [04:13<05:50, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1228/3000 [04:14<03:59, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1229/3000 [04:14<04:13, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1232/3000 [04:14<03:33, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1234/3000 [04:15<05:09, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1236/3000 [04:15<05:32, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1238/3000 [04:15<05:20, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1239/3000 [04:16<07:50, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1240/3000 [04:16<07:23, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1242/3000 [04:17<06:41, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1243/3000 [04:17<06:25, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1245/3000 [04:17<05:19, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1246/3000 [04:18<08:31, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1248/3000 [04:18<08:09, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1251/3000 [04:19<08:12, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1257/3000 [04:19<03:30, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1259/3000 [04:20<04:18, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1261/3000 [04:20<04:51, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1262/3000 [04:21<05:17, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1264/3000 [04:21<05:30, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1266/3000 [04:22<09:39, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1268/3000 [04:23<07:08, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1272/3000 [04:23<03:50, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1274/3000 [04:23<03:48, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1276/3000 [04:23<03:43, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1279/3000 [04:24<03:39, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1281/3000 [04:24<05:31, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1283/3000 [04:25<06:00, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1285/3000 [04:26<06:44, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1288/3000 [04:26<06:45, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1289/3000 [04:27<06:37, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1292/3000 [04:27<05:38, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1295/3000 [04:27<05:07, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1298/3000 [04:28<03:51, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1299/3000 [04:28<03:46, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1302/3000 [04:28<03:43, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1303/3000 [04:29<06:16, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1304/3000 [04:29<06:27, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1306/3000 [04:29<05:54, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1309/3000 [04:30<05:17, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1311/3000 [04:30<04:56, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1314/3000 [04:31<04:13, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1316/3000 [04:31<04:38, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1317/3000 [04:32<07:25, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1320/3000 [04:32<05:31, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1322/3000 [04:33<05:08, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1323/3000 [04:33<04:45, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1324/3000 [04:33<08:07, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1327/3000 [04:34<05:24, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1329/3000 [04:34<05:10, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1331/3000 [04:34<04:44, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1332/3000 [04:35<06:27, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1334/3000 [04:35<06:17, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1338/3000 [04:36<05:28, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1340/3000 [04:36<04:55, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1342/3000 [04:37<05:14, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1344/3000 [04:37<05:49, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1347/3000 [04:38<05:31, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1350/3000 [04:38<05:54, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1353/3000 [04:39<05:28, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1355/3000 [04:39<04:08, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1358/3000 [04:40<04:20, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1360/3000 [04:40<04:28, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1361/3000 [04:40<04:06, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1363/3000 [04:40<04:22, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1364/3000 [04:41<07:21, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1365/3000 [04:41<07:37, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1367/3000 [04:42<07:19, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1368/3000 [04:42<09:06, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1370/3000 [04:43<06:17, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1374/3000 [04:43<04:30, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1376/3000 [04:44<04:31, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1379/3000 [04:44<04:18, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1381/3000 [04:44<04:13, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1383/3000 [04:45<04:43, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1385/3000 [04:45<05:56, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1387/3000 [04:46<06:16, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1390/3000 [04:46<04:59, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1392/3000 [04:47<05:15, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1393/3000 [04:47<06:43, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1395/3000 [04:48<05:50, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1397/3000 [04:48<05:53, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1399/3000 [04:48<04:33, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1401/3000 [04:48<03:58, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1404/3000 [04:49<05:30, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1405/3000 [04:50<06:08, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1408/3000 [04:50<05:11, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1410/3000 [04:51<06:26, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1412/3000 [04:51<04:54, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1413/3000 [04:51<05:50, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1414/3000 [04:52<07:09, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1415/3000 [04:52<07:36, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1417/3000 [04:52<05:33, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1418/3000 [04:52<05:29, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1419/3000 [04:53<05:32, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1421/3000 [04:53<06:44, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1424/3000 [04:54<07:53, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1426/3000 [04:55<06:20, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1430/3000 [04:55<04:32, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1432/3000 [04:55<03:39, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1434/3000 [04:56<04:36, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1435/3000 [04:56<04:51, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1436/3000 [04:56<05:15, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1437/3000 [04:57<05:56, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1438/3000 [04:57<06:49, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1440/3000 [04:58<06:59, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1441/3000 [04:58<08:17, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1443/3000 [04:58<06:04, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1446/3000 [04:59<05:40, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1447/3000 [04:59<05:21, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1448/3000 [05:00<06:28, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1451/3000 [05:00<04:08, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1453/3000 [05:00<04:42, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1454/3000 [05:01<05:34, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1456/3000 [05:01<06:27, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [05:02<07:10, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 1460/3000 [05:02<05:02, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1464/3000 [05:03<03:48, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1466/3000 [05:03<03:35, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1468/3000 [05:03<03:16, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1470/3000 [05:03<03:19, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1471/3000 [05:04<04:22, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1473/3000 [05:04<04:49, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1475/3000 [05:05<05:28, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1478/3000 [05:06<08:06, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1480/3000 [05:06<06:13, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1484/3000 [05:07<04:32, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1486/3000 [05:07<03:21, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1491/3000 [05:08<04:32, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1493/3000 [05:08<03:58, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1495/3000 [05:09<05:56, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1497/3000 [05:09<05:14, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1499/3000 [05:10<05:15, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1502/3000 [05:11<05:17, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1508/3000 [05:11<03:02, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1511/3000 [05:12<03:53, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1512/3000 [05:12<03:39, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1514/3000 [05:13<07:38, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1516/3000 [05:13<06:53, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1517/3000 [05:14<06:33, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1518/3000 [05:14<06:25, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1520/3000 [05:14<05:14, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1525/3000 [05:15<02:58, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1527/3000 [05:15<03:56, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1529/3000 [05:16<05:13, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1530/3000 [05:16<06:47, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1534/3000 [05:17<04:59, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1537/3000 [05:18<04:27, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1539/3000 [05:18<03:58, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1540/3000 [05:19<06:48, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1543/3000 [05:19<05:26, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1545/3000 [05:19<04:07, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1546/3000 [05:20<04:56, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1548/3000 [05:20<04:25, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1550/3000 [05:20<04:17, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1551/3000 [05:21<04:27, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1553/3000 [05:21<03:50, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1556/3000 [05:22<04:32, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1557/3000 [05:22<04:04, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1559/3000 [05:22<04:48, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1560/3000 [05:22<05:38, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1562/3000 [05:24<08:20, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1567/3000 [05:24<04:19, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1570/3000 [05:24<03:18, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1572/3000 [05:25<03:32, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1574/3000 [05:25<03:09, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1577/3000 [05:25<03:57, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1578/3000 [05:26<05:15, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1580/3000 [05:27<06:12, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1582/3000 [05:27<05:51, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1584/3000 [05:27<05:36, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1585/3000 [05:28<05:15, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1587/3000 [05:28<05:06, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1590/3000 [05:28<03:38, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1591/3000 [05:29<04:14, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1594/3000 [05:29<04:07, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1597/3000 [05:29<03:10, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1598/3000 [05:30<04:27, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1600/3000 [05:30<04:13, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1601/3000 [05:30<04:51, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1604/3000 [05:32<06:18, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1608/3000 [05:32<04:10, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1609/3000 [05:32<04:01, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1610/3000 [05:33<05:27, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1615/3000 [05:33<03:05, 7.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1617/3000 [05:34<03:58, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1619/3000 [05:34<04:41, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1620/3000 [05:35<05:29, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1621/3000 [05:35<06:07, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1624/3000 [05:36<05:22, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1626/3000 [05:36<04:01, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1629/3000 [05:36<03:27, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1631/3000 [05:36<03:16, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1633/3000 [05:37<03:25, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1635/3000 [05:37<03:10, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1637/3000 [05:37<03:21, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1639/3000 [05:38<05:31, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1640/3000 [05:39<08:15, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1641/3000 [05:39<08:05, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1644/3000 [05:39<04:38, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1646/3000 [05:40<03:52, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1648/3000 [05:40<04:08, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1652/3000 [05:40<02:41, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1654/3000 [05:41<04:53, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1656/3000 [05:42<05:22, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1659/3000 [05:42<03:34, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1661/3000 [05:43<05:01, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1664/3000 [05:43<04:10, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1665/3000 [05:44<04:12, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1668/3000 [05:44<03:16, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1669/3000 [05:44<04:38, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1671/3000 [05:45<04:36, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1673/3000 [05:45<05:14, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1677/3000 [05:46<04:03, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1678/3000 [05:46<04:24, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [05:47<05:29, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1681/3000 [05:47<06:24, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1683/3000 [05:48<05:07, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1684/3000 [05:48<05:08, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1686/3000 [05:48<04:00, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1687/3000 [05:49<04:49, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1690/3000 [05:49<03:57, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1691/3000 [05:49<04:05, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1692/3000 [05:50<04:58, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1694/3000 [05:50<04:50, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1695/3000 [05:50<05:07, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1697/3000 [05:51<05:53, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1699/3000 [05:51<04:07, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1703/3000 [05:52<03:02, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [05:52<03:01, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1707/3000 [05:53<04:41, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1709/3000 [05:53<03:43, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1712/3000 [05:53<02:51, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1713/3000 [05:53<03:04, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1715/3000 [05:54<03:37, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1718/3000 [05:54<03:41, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1720/3000 [05:55<04:01, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1721/3000 [05:55<04:05, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1722/3000 [05:56<06:34, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1726/3000 [05:56<03:48, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1728/3000 [05:56<02:49, 7.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1731/3000 [05:57<03:16, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1733/3000 [05:57<04:33, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1734/3000 [05:58<04:02, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1735/3000 [05:58<04:15, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1736/3000 [05:58<05:19, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1738/3000 [05:59<05:09, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1740/3000 [05:59<05:29, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1741/3000 [06:00<08:24, 2.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1746/3000 [06:01<04:06, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1750/3000 [06:01<02:26, 8.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1752/3000 [06:01<02:55, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1754/3000 [06:02<03:43, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1755/3000 [06:03<06:25, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1756/3000 [06:03<06:44, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1757/3000 [06:04<08:17, 2.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1759/3000 [06:04<06:20, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1760/3000 [06:04<06:55, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1765/3000 [06:05<03:27, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1768/3000 [06:05<03:05, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1770/3000 [06:06<03:35, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1772/3000 [06:06<03:31, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1773/3000 [06:06<03:39, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1775/3000 [06:07<04:20, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1777/3000 [06:07<04:49, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1779/3000 [06:08<04:15, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1780/3000 [06:08<05:35, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1781/3000 [06:08<05:25, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1785/3000 [06:09<03:06, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1789/3000 [06:10<03:19, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1790/3000 [06:10<03:30, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1793/3000 [06:10<03:15, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1794/3000 [06:11<04:01, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1796/3000 [06:12<06:23, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1798/3000 [06:12<04:55, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1801/3000 [06:13<04:17, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1805/3000 [06:13<02:07, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1807/3000 [06:13<01:55, 10.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1811/3000 [06:14<03:42, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1813/3000 [06:14<02:58, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1815/3000 [06:15<04:29, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1816/3000 [06:15<04:43, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1818/3000 [06:16<05:21, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1820/3000 [06:17<05:36, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1821/3000 [06:17<05:37, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1823/3000 [06:17<04:50, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1826/3000 [06:18<03:50, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1828/3000 [06:18<04:12, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1830/3000 [06:19<03:25, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1831/3000 [06:20<07:00, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1833/3000 [06:20<05:33, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1835/3000 [06:20<04:15, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1836/3000 [06:21<04:22, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1837/3000 [06:21<04:33, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1841/3000 [06:21<02:52, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1842/3000 [06:22<04:31, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1846/3000 [06:22<02:53, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1848/3000 [06:23<04:09, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1849/3000 [06:23<04:22, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1851/3000 [06:24<04:26, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1852/3000 [06:24<04:19, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1854/3000 [06:24<03:32, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1856/3000 [06:25<03:28, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1857/3000 [06:25<04:18, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1859/3000 [06:25<03:38, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1861/3000 [06:26<04:27, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1863/3000 [06:26<04:20, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1865/3000 [06:27<04:44, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1866/3000 [06:27<04:01, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1868/3000 [06:27<03:43, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1869/3000 [06:28<04:20, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1871/3000 [06:28<05:03, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1873/3000 [06:29<04:22, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1878/3000 [06:30<03:30, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1883/3000 [06:31<04:07, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1886/3000 [06:32<03:22, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1889/3000 [06:32<03:34, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1891/3000 [06:33<04:27, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1893/3000 [06:33<03:56, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1895/3000 [06:34<04:25, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1896/3000 [06:34<04:36, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1898/3000 [06:34<04:23, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1899/3000 [06:35<04:55, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1902/3000 [06:35<04:17, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1903/3000 [06:36<05:36, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1906/3000 [06:37<04:12, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1907/3000 [06:37<04:02, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1910/3000 [06:37<03:34, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1911/3000 [06:37<03:26, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1913/3000 [06:38<04:01, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1914/3000 [06:38<04:50, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1916/3000 [06:39<04:20, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1918/3000 [06:39<03:58, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1919/3000 [06:40<04:13, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1920/3000 [06:40<05:36, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1923/3000 [06:40<03:45, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1924/3000 [06:41<04:13, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1927/3000 [06:41<03:53, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1929/3000 [06:42<03:17, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1930/3000 [06:42<04:28, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1932/3000 [06:43<06:03, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1935/3000 [06:44<03:57, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1937/3000 [06:44<02:59, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1940/3000 [06:44<02:54, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1941/3000 [06:45<03:43, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1943/3000 [06:45<03:50, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1944/3000 [06:46<06:17, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1946/3000 [06:47<06:46, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1948/3000 [06:47<05:10, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1951/3000 [06:48<04:03, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1953/3000 [06:48<03:51, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1954/3000 [06:49<05:22, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1955/3000 [06:49<05:15, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1957/3000 [06:50<05:29, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1959/3000 [06:50<05:26, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1961/3000 [06:51<03:53, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1962/3000 [06:51<03:33, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1964/3000 [06:51<03:07, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1966/3000 [06:52<03:51, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1967/3000 [06:52<03:21, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1969/3000 [06:52<03:30, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1970/3000 [06:53<04:03, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1972/3000 [06:53<03:32, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1973/3000 [06:54<05:31, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1974/3000 [06:54<05:43, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1976/3000 [06:54<04:45, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1977/3000 [06:55<04:39, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1979/3000 [06:55<04:55, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1982/3000 [06:56<03:24, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1985/3000 [06:56<02:44, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1987/3000 [06:57<03:14, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1989/3000 [06:57<03:43, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1991/3000 [06:58<04:01, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1992/3000 [06:58<03:55, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1994/3000 [06:59<04:22, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1997/3000 [06:59<03:28, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1998/3000 [07:00<04:31, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1999/3000 [07:00<04:20, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2000/3000 [07:00<04:26, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2002/3000 [07:01<04:40, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2004/3000 [07:01<03:46, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2007/3000 [07:02<02:48, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2008/3000 [07:02<03:13, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2009/3000 [07:03<05:07, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2011/3000 [07:03<04:05, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2012/3000 [07:03<05:35, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2013/3000 [07:04<05:35, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2014/3000 [07:04<05:16, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2017/3000 [07:05<03:41, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2019/3000 [07:05<03:43, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2020/3000 [07:05<03:59, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2021/3000 [07:06<03:48, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2022/3000 [07:06<05:54, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2026/3000 [07:07<03:14, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2027/3000 [07:07<02:55, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2028/3000 [07:08<05:49, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2032/3000 [07:08<03:23, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2034/3000 [07:08<02:36, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2037/3000 [07:09<02:28, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2038/3000 [07:09<02:31, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [07:09<03:07, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2040/3000 [07:10<05:03, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2041/3000 [07:10<05:32, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2042/3000 [07:11<05:48, 2.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2044/3000 [07:12<06:37, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2047/3000 [07:12<03:46, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2050/3000 [07:13<02:54, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2052/3000 [07:13<02:09, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2056/3000 [07:14<02:41, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2058/3000 [07:14<02:14, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2060/3000 [07:15<03:05, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2062/3000 [07:15<03:19, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2064/3000 [07:16<04:30, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2065/3000 [07:16<04:49, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2067/3000 [07:17<04:42, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2069/3000 [07:17<03:47, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2070/3000 [07:18<04:14, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2071/3000 [07:18<04:46, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2072/3000 [07:19<04:42, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2074/3000 [07:19<03:22, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2075/3000 [07:19<03:21, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2079/3000 [07:20<02:55, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2081/3000 [07:20<03:40, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2083/3000 [07:21<03:06, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2085/3000 [07:21<02:27, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2086/3000 [07:21<03:49, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2089/3000 [07:22<02:50, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2090/3000 [07:23<04:52, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2093/3000 [07:23<04:31, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2095/3000 [07:24<03:36, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2097/3000 [07:24<03:20, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2099/3000 [07:25<04:31, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2101/3000 [07:26<03:45, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2103/3000 [07:26<03:07, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2105/3000 [07:26<03:14, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2106/3000 [07:27<04:00, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2107/3000 [07:27<04:56, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2108/3000 [07:28<05:30, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2110/3000 [07:28<04:17, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2112/3000 [07:28<03:43, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2113/3000 [07:29<04:02, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2114/3000 [07:29<04:35, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2115/3000 [07:30<04:38, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2119/3000 [07:30<03:06, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2121/3000 [07:31<03:40, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2123/3000 [07:31<03:19, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2124/3000 [07:32<03:27, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2125/3000 [07:32<04:55, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2126/3000 [07:33<05:27, 2.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2127/3000 [07:34<07:16, 2.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2128/3000 [07:34<06:16, 2.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2132/3000 [07:34<03:19, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2133/3000 [07:35<04:24, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2135/3000 [07:36<04:30, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2137/3000 [07:36<03:11, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2138/3000 [07:36<03:41, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2141/3000 [07:36<02:22, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2143/3000 [07:37<02:51, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2145/3000 [07:39<05:56, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2148/3000 [07:39<04:49, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2149/3000 [07:40<04:50, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2151/3000 [07:40<03:56, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2155/3000 [07:41<02:35, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2156/3000 [07:41<03:16, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2157/3000 [07:41<03:35, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2158/3000 [07:42<04:02, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2162/3000 [07:42<02:53, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2163/3000 [07:42<02:34, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2164/3000 [07:43<02:49, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2166/3000 [07:43<03:28, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2167/3000 [07:44<04:48, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2168/3000 [07:45<06:03, 2.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2170/3000 [07:45<04:53, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2171/3000 [07:46<06:38, 2.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2172/3000 [07:46<05:52, 2.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2173/3000 [07:47<05:17, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▎ | 2175/3000 [07:47<04:13, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2176/3000 [07:47<03:28, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2177/3000 [07:47<03:42, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2178/3000 [07:48<04:18, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2181/3000 [07:48<02:46, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2183/3000 [07:49<02:49, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2184/3000 [07:49<04:07, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2185/3000 [07:50<04:05, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2188/3000 [07:50<03:02, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2189/3000 [07:50<03:19, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2192/3000 [07:51<02:34, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2193/3000 [07:51<03:27, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2196/3000 [07:52<02:41, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2198/3000 [07:53<03:26, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2200/3000 [07:53<02:43, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2201/3000 [07:53<02:59, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2203/3000 [07:54<03:15, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2204/3000 [07:54<03:05, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2206/3000 [07:54<02:45, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2207/3000 [07:55<04:03, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2208/3000 [07:55<04:39, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2209/3000 [07:56<04:54, 2.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2210/3000 [07:57<07:44, 1.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2213/3000 [07:57<04:03, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2219/3000 [07:58<01:53, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2221/3000 [07:58<01:40, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2222/3000 [07:59<03:26, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2223/3000 [07:59<03:20, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2224/3000 [07:59<03:15, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2225/3000 [08:00<03:51, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2226/3000 [08:00<03:36, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2228/3000 [08:00<03:17, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2229/3000 [08:01<03:28, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2231/3000 [08:01<03:18, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2233/3000 [08:01<02:39, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2236/3000 [08:02<01:50, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2238/3000 [08:02<01:24, 9.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2240/3000 [08:02<01:25, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2242/3000 [08:04<04:21, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2244/3000 [08:04<03:39, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2245/3000 [08:05<04:46, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2246/3000 [08:05<04:34, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2248/3000 [08:05<03:35, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2249/3000 [08:06<03:50, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2252/3000 [08:06<02:24, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2253/3000 [08:06<02:49, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2255/3000 [08:07<03:50, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2257/3000 [08:08<03:35, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2259/3000 [08:08<02:57, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2260/3000 [08:08<02:41, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2261/3000 [08:09<03:18, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2263/3000 [08:09<03:08, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2265/3000 [08:10<03:48, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2268/3000 [08:10<02:12, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2269/3000 [08:11<02:48, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2271/3000 [08:11<02:49, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2274/3000 [08:13<03:40, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2275/3000 [08:13<03:36, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2278/3000 [08:13<02:31, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2280/3000 [08:14<02:48, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2282/3000 [08:14<03:00, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2284/3000 [08:15<02:51, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2285/3000 [08:15<03:26, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2290/3000 [08:16<01:49, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2292/3000 [08:16<02:08, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2294/3000 [08:17<03:21, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2297/3000 [08:18<02:10, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2299/3000 [08:18<01:58, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2300/3000 [08:18<02:31, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2301/3000 [08:19<03:36, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2302/3000 [08:19<03:55, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2305/3000 [08:19<02:17, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2306/3000 [08:20<02:22, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2308/3000 [08:20<02:19, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2310/3000 [08:20<01:56, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2311/3000 [08:21<04:12, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2316/3000 [08:22<01:57, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2318/3000 [08:22<01:59, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2319/3000 [08:22<02:07, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2321/3000 [08:23<02:59, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2323/3000 [08:24<03:06, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2325/3000 [08:24<02:12, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2326/3000 [08:24<02:37, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2328/3000 [08:25<02:11, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2329/3000 [08:25<02:17, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2330/3000 [08:25<02:28, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2332/3000 [08:26<03:31, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2333/3000 [08:26<02:57, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2336/3000 [08:27<02:05, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2337/3000 [08:27<02:32, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2338/3000 [08:28<03:42, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2342/3000 [08:28<01:54, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2344/3000 [08:28<01:39, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2347/3000 [08:29<02:11, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2348/3000 [08:29<02:16, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2351/3000 [08:30<01:34, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2354/3000 [08:30<01:19, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2356/3000 [08:30<01:48, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2357/3000 [08:31<02:09, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2359/3000 [08:32<02:56, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2360/3000 [08:32<03:36, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2361/3000 [08:32<03:17, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2363/3000 [08:33<02:44, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2366/3000 [08:33<02:02, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2368/3000 [08:34<02:10, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2370/3000 [08:34<02:09, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2372/3000 [08:34<01:33, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2374/3000 [08:35<02:44, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2375/3000 [08:35<02:22, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2378/3000 [08:36<01:39, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2380/3000 [08:36<01:23, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2382/3000 [08:36<01:35, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2383/3000 [08:37<02:33, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2385/3000 [08:37<02:28, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2387/3000 [08:38<02:36, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2389/3000 [08:38<02:18, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2390/3000 [08:39<02:20, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2394/3000 [08:39<02:04, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2395/3000 [08:40<03:16, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2399/3000 [08:40<01:49, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2404/3000 [08:42<02:17, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2405/3000 [08:42<02:07, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2407/3000 [08:43<02:43, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2409/3000 [08:43<02:30, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2412/3000 [08:43<01:31, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2416/3000 [08:44<01:11, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2418/3000 [08:45<02:14, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2422/3000 [08:45<01:37, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2424/3000 [08:46<01:41, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2426/3000 [08:47<02:53, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2428/3000 [08:48<02:42, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2429/3000 [08:48<02:32, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2430/3000 [08:48<02:29, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2436/3000 [08:49<01:20, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2437/3000 [08:49<01:39, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2439/3000 [08:49<01:46, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2440/3000 [08:50<02:02, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2442/3000 [08:50<02:21, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2443/3000 [08:51<02:18, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2444/3000 [08:51<02:30, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2445/3000 [08:52<03:15, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2449/3000 [08:52<01:46, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2452/3000 [08:53<01:46, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2458/3000 [08:53<01:10, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2460/3000 [08:53<01:09, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2462/3000 [08:54<01:58, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2463/3000 [08:55<02:11, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2466/3000 [08:56<02:23, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2468/3000 [08:56<02:01, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2469/3000 [08:56<02:04, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2471/3000 [08:57<02:49, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2473/3000 [08:58<02:02, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▎ | 2475/3000 [08:58<01:51, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2476/3000 [08:59<02:36, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2477/3000 [08:59<03:05, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2480/3000 [09:00<02:40, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2483/3000 [09:00<01:51, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2484/3000 [09:01<01:53, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2486/3000 [09:01<01:55, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2487/3000 [09:01<01:52, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2488/3000 [09:02<03:37, 2.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2490/3000 [09:03<02:47, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2492/3000 [09:03<02:08, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2493/3000 [09:03<02:19, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2496/3000 [09:04<01:39, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2497/3000 [09:04<01:52, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2499/3000 [09:04<01:49, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2503/3000 [09:05<01:17, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2505/3000 [09:06<01:45, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2506/3000 [09:07<02:42, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2508/3000 [09:07<02:08, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2512/3000 [09:07<01:16, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2514/3000 [09:08<01:04, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2516/3000 [09:08<01:46, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2518/3000 [09:09<01:38, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2520/3000 [09:09<01:16, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2521/3000 [09:10<02:28, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2522/3000 [09:10<02:29, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2525/3000 [09:11<01:49, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2528/3000 [09:11<01:17, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2530/3000 [09:11<01:12, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2532/3000 [09:12<01:12, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2534/3000 [09:12<01:41, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2536/3000 [09:12<01:19, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2538/3000 [09:13<01:06, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2540/3000 [09:13<01:18, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2541/3000 [09:13<01:27, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2542/3000 [09:14<02:12, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2543/3000 [09:14<02:33, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2546/3000 [09:15<02:02, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2548/3000 [09:15<01:31, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2551/3000 [09:16<01:23, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2554/3000 [09:16<01:07, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2555/3000 [09:17<01:38, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2557/3000 [09:17<01:29, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2559/3000 [09:17<01:28, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2560/3000 [09:19<03:02, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2562/3000 [09:19<02:30, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2564/3000 [09:19<01:39, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2567/3000 [09:20<01:17, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2569/3000 [09:20<00:58, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2570/3000 [09:20<01:20, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2573/3000 [09:21<01:17, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2575/3000 [09:21<01:28, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2577/3000 [09:22<01:08, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2578/3000 [09:22<01:58, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2579/3000 [09:23<02:55, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2580/3000 [09:23<02:31, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2582/3000 [09:24<02:12, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2584/3000 [09:24<01:36, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2587/3000 [09:24<01:03, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2589/3000 [09:25<00:55, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2590/3000 [09:25<01:03, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2593/3000 [09:25<00:49, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2594/3000 [09:25<01:05, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2597/3000 [09:26<00:58, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2598/3000 [09:27<02:30, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2599/3000 [09:28<02:31, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2600/3000 [09:28<02:21, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2601/3000 [09:28<02:23, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2602/3000 [09:28<02:06, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2603/3000 [09:29<02:43, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2606/3000 [09:30<01:59, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2607/3000 [09:30<01:50, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2608/3000 [09:30<01:45, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2610/3000 [09:31<01:24, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2611/3000 [09:31<01:35, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2615/3000 [09:31<00:50, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2617/3000 [09:32<01:03, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2619/3000 [09:32<01:08, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2621/3000 [09:33<01:09, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2622/3000 [09:33<01:45, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2625/3000 [09:34<01:24, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2628/3000 [09:34<01:09, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2630/3000 [09:35<01:13, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2631/3000 [09:35<01:13, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2633/3000 [09:35<01:18, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2635/3000 [09:36<01:35, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2638/3000 [09:37<01:11, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2640/3000 [09:37<01:20, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2641/3000 [09:37<01:14, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2644/3000 [09:38<01:01, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2646/3000 [09:38<01:10, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2647/3000 [09:39<01:22, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2650/3000 [09:39<01:15, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2651/3000 [09:40<01:32, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2652/3000 [09:40<01:42, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2653/3000 [09:41<01:43, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2656/3000 [09:41<01:14, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2657/3000 [09:41<01:09, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [09:42<01:26, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2659/3000 [09:42<01:35, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2661/3000 [09:43<01:22, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2662/3000 [09:43<01:16, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2663/3000 [09:43<01:25, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2664/3000 [09:44<02:11, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2666/3000 [09:44<01:38, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2668/3000 [09:45<01:32, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2670/3000 [09:45<01:21, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2672/3000 [09:45<00:55, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2673/3000 [09:45<00:57, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2675/3000 [09:46<00:53, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2678/3000 [09:46<00:34, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2680/3000 [09:46<00:56, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2681/3000 [09:47<01:12, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2682/3000 [09:47<01:15, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2684/3000 [09:48<01:23, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2685/3000 [09:48<01:27, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2686/3000 [09:49<02:04, 2.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2688/3000 [09:49<01:26, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2691/3000 [09:50<00:59, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2693/3000 [09:50<00:52, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2695/3000 [09:50<00:58, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2696/3000 [09:51<01:07, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2699/3000 [09:51<01:07, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2700/3000 [09:52<01:13, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2703/3000 [09:53<01:17, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2704/3000 [09:53<01:37, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2705/3000 [09:53<01:27, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2706/3000 [09:54<01:29, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2711/3000 [09:54<00:37, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2713/3000 [09:54<00:40, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2716/3000 [09:55<01:04, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2719/3000 [09:56<00:58, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2720/3000 [09:56<00:53, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2723/3000 [09:57<00:49, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2725/3000 [09:57<00:54, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2726/3000 [09:57<00:50, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2727/3000 [09:58<01:26, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2729/3000 [09:58<01:06, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2730/3000 [09:59<01:04, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2732/3000 [09:59<00:51, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2733/3000 [09:59<00:59, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2736/3000 [10:00<00:44, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2738/3000 [10:00<00:44, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2742/3000 [10:00<00:38, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2744/3000 [10:01<00:59, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2745/3000 [10:01<01:02, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2746/3000 [10:02<01:08, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2749/3000 [10:03<01:01, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2750/3000 [10:03<00:56, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2753/3000 [10:03<00:45, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2757/3000 [10:04<00:31, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2758/3000 [10:04<00:36, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2759/3000 [10:04<00:56, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2761/3000 [10:05<00:50, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2762/3000 [10:05<01:12, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2763/3000 [10:06<01:07, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2767/3000 [10:06<00:45, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2768/3000 [10:06<00:46, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2770/3000 [10:07<00:59, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2772/3000 [10:08<00:53, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2773/3000 [10:08<00:56, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2776/3000 [10:08<00:37, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2780/3000 [10:09<00:28, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [10:09<00:43, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2782/3000 [10:10<00:54, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2784/3000 [10:10<00:48, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2786/3000 [10:11<00:47, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2788/3000 [10:11<00:45, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2792/3000 [10:11<00:34, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2794/3000 [10:12<00:30, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2795/3000 [10:12<00:53, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2797/3000 [10:13<00:57, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2798/3000 [10:13<00:59, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2800/3000 [10:14<00:47, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2802/3000 [10:14<00:38, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2803/3000 [10:14<00:46, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2805/3000 [10:14<00:37, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2806/3000 [10:15<00:47, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2809/3000 [10:15<00:41, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2810/3000 [10:16<00:38, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2812/3000 [10:16<00:32, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2813/3000 [10:16<00:35, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2815/3000 [10:17<00:37, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2817/3000 [10:17<00:37, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2818/3000 [10:17<00:43, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2820/3000 [10:18<00:36, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2821/3000 [10:18<00:40, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2823/3000 [10:18<00:31, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2824/3000 [10:18<00:34, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2825/3000 [10:19<01:01, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2827/3000 [10:20<00:47, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2829/3000 [10:20<00:31, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2830/3000 [10:20<00:33, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2831/3000 [10:20<00:33, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2833/3000 [10:20<00:27, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2834/3000 [10:21<00:50, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2836/3000 [10:22<00:39, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2837/3000 [10:22<00:58, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2839/3000 [10:23<00:49, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2842/3000 [10:23<00:33, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2844/3000 [10:24<00:32, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2847/3000 [10:24<00:31, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2849/3000 [10:25<00:29, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2850/3000 [10:25<00:38, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2855/3000 [10:25<00:18, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2857/3000 [10:26<00:26, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2858/3000 [10:26<00:33, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2859/3000 [10:27<00:36, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2860/3000 [10:27<00:34, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2862/3000 [10:27<00:30, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2863/3000 [10:28<00:32, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2866/3000 [10:28<00:28, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2867/3000 [10:29<00:29, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2868/3000 [10:29<00:32, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2872/3000 [10:29<00:17, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2873/3000 [10:30<00:23, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2876/3000 [10:30<00:22, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2878/3000 [10:31<00:24, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2882/3000 [10:31<00:19, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2884/3000 [10:32<00:24, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2885/3000 [10:32<00:24, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2888/3000 [10:33<00:20, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2892/3000 [10:33<00:16, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2894/3000 [10:34<00:19, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2895/3000 [10:34<00:21, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2898/3000 [10:34<00:18, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2899/3000 [10:35<00:24, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2901/3000 [10:35<00:19, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2902/3000 [10:36<00:23, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2904/3000 [10:36<00:21, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2905/3000 [10:37<00:26, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2907/3000 [10:37<00:23, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2911/3000 [10:37<00:13, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2913/3000 [10:38<00:19, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2914/3000 [10:38<00:19, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2915/3000 [10:39<00:19, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2916/3000 [10:39<00:21, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2918/3000 [10:39<00:18, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2919/3000 [10:40<00:19, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2921/3000 [10:40<00:16, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2923/3000 [10:40<00:13, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2928/3000 [10:41<00:09, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2930/3000 [10:41<00:09, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2931/3000 [10:41<00:14, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2933/3000 [10:42<00:13, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2934/3000 [10:43<00:20, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2936/3000 [10:43<00:15, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2938/3000 [10:43<00:12, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2939/3000 [10:43<00:12, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2940/3000 [10:44<00:12, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2942/3000 [10:44<00:11, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2946/3000 [10:44<00:08, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2949/3000 [10:45<00:05, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2951/3000 [10:45<00:06, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2952/3000 [10:45<00:07, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2953/3000 [10:46<00:09, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2954/3000 [10:46<00:12, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2956/3000 [10:46<00:10, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2958/3000 [10:47<00:09, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2960/3000 [10:47<00:08, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2962/3000 [10:48<00:08, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2965/3000 [10:48<00:05, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2966/3000 [10:48<00:04, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2969/3000 [10:49<00:04, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2971/3000 [10:50<00:06, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2973/3000 [10:50<00:04, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2974/3000 [10:50<00:06, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2975/3000 [10:50<00:06, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2979/3000 [10:51<00:03, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2981/3000 [10:51<00:03, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2982/3000 [10:52<00:04, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2985/3000 [10:52<00:02, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2986/3000 [10:52<00:02, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2988/3000 [10:53<00:02, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2990/3000 [10:53<00:02, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2994/3000 [10:54<00:00, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2998/3000 [10:54<00:00, 9.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [10:54<00:00, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:34:57.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mEvaluation metrics (before optimization): {'f1': 0.0003333333333333333, 'em': 0.0003333333333333333, 'acc': 0.5063333333333333}\u001b[0m\n",
"\u001b[32m2026-01-13 08:34:57.469\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mOptimizing workflow...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0/20 [00:00, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:34:57.473\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:35:23.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:36:59.559\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:37:06.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:37:06.538\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 1 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<03:49, 1.54s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:02<03:32, 1.44s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:14, 1.94it/s]\u001b[A\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:47, 3.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:37, 3.79it/s]\u001b[A\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:27, 5.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:04<00:24, 5.50it/s]\u001b[A\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:24, 5.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█▏ | 17/150 [00:04<00:20, 6.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:05<00:21, 5.99it/s]\u001b[A\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:05<00:21, 6.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:05<00:23, 5.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:06<00:40, 3.11it/s]\u001b[A\n",
"Evaluating workflow: 16%|█▌ | 24/150 [00:06<00:35, 3.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:06<00:33, 3.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:07<00:32, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:07<00:27, 4.39it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:07<00:13, 8.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:08<00:15, 7.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:08<00:14, 7.95it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:08<00:11, 9.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:09<00:19, 5.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:10<00:29, 3.59it/s]\u001b[A\n",
"Evaluating workflow: 31%|███ | 46/150 [00:10<00:23, 4.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:11<00:25, 4.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:11<00:22, 4.58it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:11<00:17, 5.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:11<00:18, 5.18it/s]\u001b[A\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:11<00:18, 5.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:12<00:14, 6.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:12<00:13, 6.94it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:12<00:14, 6.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 59/150 [00:12<00:13, 6.73it/s]\u001b[A\n",
"Evaluating workflow: 41%|████ | 61/150 [00:12<00:10, 8.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:13<00:12, 7.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:14<00:23, 3.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:14<00:19, 4.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:14<00:24, 3.36it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:15<00:18, 4.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:15<00:18, 4.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:15<00:16, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:16<00:14, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:16<00:12, 5.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:16<00:10, 6.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:16<00:09, 7.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:17<00:16, 4.10it/s]\u001b[A\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:17<00:12, 5.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:18<00:11, 5.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:18<00:13, 4.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:18<00:11, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:19<00:16, 3.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:19<00:15, 3.77it/s]\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:19<00:08, 6.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:20<00:07, 6.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:20<00:08, 6.07it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:20<00:05, 9.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:21<00:09, 5.11it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 105/150 [00:21<00:07, 5.98it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:21<00:07, 5.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:22<00:10, 3.93it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:22<00:09, 4.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:22<00:08, 4.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:23<00:09, 3.98it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:23<00:08, 4.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:23<00:04, 7.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:23<00:04, 7.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:24<00:04, 6.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:24<00:05, 5.40it/s]\u001b[A\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:24<00:05, 5.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:24<00:04, 6.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:24<00:05, 5.07it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:25<00:03, 7.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:25<00:03, 7.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:25<00:06, 3.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:26<00:06, 3.43it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:26<00:05, 3.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:26<00:04, 4.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:26<00:04, 4.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:27<00:04, 3.62it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:27<00:02, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:27<00:03, 4.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:28<00:04, 3.09it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:28<00:03, 3.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:28<00:01, 6.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:29<00:01, 5.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:29<00:01, 5.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:29<00:00, 5.10it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:30<00:00, 6.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:30<00:00, 5.91it/s]\u001b[A\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:30<00:00, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:30<00:00, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:37:37.574\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 1 metrics: {'f1': 0.006666666666666667, 'em': 0.006666666666666667, 'acc': 0.49333333333333335}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 5%|▌ | 1/20 [02:40<50:41, 160.10s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:37:37.575\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:37:58.281\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:39:46.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:39:54.283\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:39:54.284\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 2 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<02:36, 1.05s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<02:25, 1.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:02<02:22, 1.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:03<01:42, 1.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:26, 1.68it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:29, 4.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:25, 5.48it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:04<00:21, 6.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:14, 9.43it/s]\u001b[A\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:04<00:10, 11.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:05<00:20, 6.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:05<00:27, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:06<00:35, 3.57it/s]\u001b[A\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:06<00:32, 3.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:07<00:30, 3.97it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:07<00:27, 4.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:07<00:21, 5.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:07<00:19, 6.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:08<00:17, 6.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:08<00:18, 6.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:08<00:17, 6.63it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 40/150 [00:08<00:12, 8.74it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:08<00:15, 7.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:09<00:20, 5.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:09<00:24, 4.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:10<00:26, 3.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:10<00:23, 4.32it/s]\u001b[A\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:10<00:17, 5.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:10<00:17, 5.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:11<00:21, 4.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:11<00:17, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:12<00:22, 4.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:12<00:28, 3.29it/s]\u001b[A\n",
"Evaluating workflow: 40%|████ | 60/150 [00:13<00:14, 6.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:13<00:12, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:13<00:15, 5.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:13<00:16, 5.28it/s]\u001b[A\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:14<00:15, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:14<00:21, 3.84it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:14<00:15, 5.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:15<00:16, 4.97it/s]\u001b[A\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:15<00:10, 7.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:15<00:08, 8.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:16<00:15, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:16<00:16, 4.40it/s]\u001b[A\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:16<00:15, 4.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:17<00:12, 5.35it/s]\u001b[A\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:17<00:14, 4.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:17<00:14, 4.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:17<00:14, 4.44it/s]\u001b[A\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:17<00:10, 5.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:18<00:10, 6.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:18<00:11, 5.56it/s]\u001b[A\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:18<00:07, 7.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:18<00:07, 7.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:18<00:08, 6.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:19<00:11, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:19<00:09, 5.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:20<00:12, 4.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:20<00:12, 4.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:21<00:11, 4.12it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:21<00:08, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:21<00:06, 6.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:21<00:06, 6.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:21<00:05, 7.24it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:21<00:04, 8.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:22<00:04, 8.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:22<00:07, 4.96it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:23<00:06, 5.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:23<00:08, 3.80it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:23<00:06, 5.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:23<00:04, 6.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:24<00:04, 6.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:24<00:06, 3.93it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:24<00:05, 4.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:25<00:03, 6.21it/s]\u001b[A\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:25<00:02, 8.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:25<00:02, 6.08it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:26<00:02, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:26<00:02, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:28<00:05, 2.48it/s]\u001b[A\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:28<00:04, 2.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:28<00:02, 3.63it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:28<00:02, 4.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:28<00:01, 5.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:29<00:00, 5.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:30<00:00, 4.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:30<00:00, 4.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:31<00:00, 4.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:40:25.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 2 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.43333333333333335}\u001b[0m\n",
"\u001b[32m2026-01-13 08:40:25.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.006666666666666667, 'em': 0.006666666666666667, 'acc': 0.49333333333333335}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 10%|█ | 2/20 [05:28<49:26, 164.82s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:40:25.691\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:40:43.707\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:42:36.435\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:42:45.216\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:42:45.217\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 3 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:00<02:22, 1.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:02<02:32, 1.03s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:03<02:35, 1.06s/it]\u001b[A\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:03<01:39, 1.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:27, 1.66it/s]\u001b[A\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<01:03, 2.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:04<00:53, 2.69it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:04<00:33, 4.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:04<00:19, 7.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:15, 8.52it/s]\u001b[A\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:14, 9.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:05<00:16, 7.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:05<00:15, 8.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:05<00:24, 5.23it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 23/150 [00:06<00:24, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:07<00:34, 3.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:07<00:37, 3.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:07<00:35, 3.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:08<00:27, 4.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:08<00:20, 5.59it/s]\u001b[A\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:08<00:14, 7.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:08<00:12, 9.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:09<00:19, 5.50it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:09<00:19, 5.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:10<00:22, 4.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:10<00:19, 5.51it/s]\u001b[A\n",
"Evaluating workflow: 31%|███ | 46/150 [00:10<00:17, 5.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:10<00:15, 6.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:11<00:23, 4.23it/s]\u001b[A\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:11<00:21, 4.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:11<00:21, 4.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:12<00:21, 4.63it/s]\u001b[A\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:12<00:18, 5.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:12<00:17, 5.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:12<00:18, 5.07it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:12<00:17, 5.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:13<00:18, 4.86it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:13<00:17, 5.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:13<00:18, 4.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:13<00:12, 6.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:14<00:20, 4.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:14<00:22, 3.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:15<00:16, 5.02it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:15<00:16, 5.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:15<00:17, 4.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:15<00:13, 5.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:16<00:12, 6.30it/s]\u001b[A\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:16<00:11, 6.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:16<00:13, 5.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:16<00:16, 4.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:17<00:21, 3.25it/s]\u001b[A\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:17<00:14, 4.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:18<00:15, 4.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:18<00:12, 5.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:19<00:12, 4.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:19<00:12, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:19<00:10, 5.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:19<00:10, 5.49it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:19<00:08, 7.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:20<00:11, 4.79it/s]\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:20<00:10, 5.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:20<00:08, 6.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:20<00:08, 6.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:21<00:11, 4.40it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:21<00:08, 5.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:21<00:08, 5.85it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:22<00:07, 6.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:22<00:10, 4.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:22<00:11, 3.86it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:22<00:10, 4.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:23<00:06, 6.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:23<00:09, 4.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:24<00:07, 4.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:24<00:06, 5.78it/s]\u001b[A\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:24<00:04, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:25<00:05, 5.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:25<00:05, 5.92it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:25<00:04, 6.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:25<00:06, 4.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:25<00:06, 4.28it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:26<00:04, 6.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:26<00:05, 4.54it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:26<00:04, 4.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:27<00:04, 5.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:27<00:05, 3.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:27<00:03, 5.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:28<00:03, 4.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:28<00:02, 6.08it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:28<00:02, 6.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:28<00:01, 7.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:29<00:02, 3.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:29<00:01, 4.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:30<00:01, 4.17it/s]\u001b[A\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:30<00:00, 5.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:30<00:00, 5.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:31<00:00, 4.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:31<00:00, 4.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-13 08:43:16.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 3 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5533333333333333}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 15%|█▌ | 3/20 [08:19<47:31, 167.74s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:43:16.904\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:43:36.771\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:45:34.979\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:45:46.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:45:46.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 4 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:00<02:17, 1.08it/s]\u001b[A\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<01:08, 2.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:02<02:08, 1.14it/s]\u001b[A\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:02<01:24, 1.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<01:11, 2.00it/s]\u001b[A\n",
"Evaluating workflow: 5%|▌ | 8/150 [00:03<00:44, 3.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:46, 3.05it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:04<00:21, 6.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 15/150 [00:04<00:18, 7.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:04<00:15, 8.61it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:04<00:10, 12.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:05<00:12, 10.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:05<00:12, 9.56it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:05<00:12, 9.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██ | 31/150 [00:06<00:23, 5.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:06<00:24, 4.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:07<00:34, 3.42it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:07<00:29, 3.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:07<00:27, 4.14it/s]\u001b[A\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:07<00:25, 4.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:08<00:20, 5.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:08<00:21, 5.08it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:08<00:16, 6.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:08<00:17, 6.16it/s]\u001b[A\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:08<00:13, 8.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:09<00:10, 9.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:09<00:11, 8.58it/s]\u001b[A\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:09<00:08, 11.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:09<00:09, 10.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:09<00:08, 11.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:11<00:23, 3.94it/s]\u001b[A\n",
"Evaluating workflow: 40%|████ | 60/150 [00:11<00:21, 4.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:11<00:19, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:11<00:16, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:12<00:18, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:12<00:16, 4.93it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:12<00:13, 6.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:12<00:10, 7.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:13<00:11, 6.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:13<00:08, 8.42it/s]\u001b[A\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:13<00:06, 11.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:14<00:12, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:14<00:12, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:15<00:14, 4.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:15<00:15, 4.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:15<00:13, 4.76it/s]\u001b[A\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:16<00:13, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:16<00:13, 4.54it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:16<00:06, 9.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:16<00:05, 9.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:17<00:08, 6.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:17<00:06, 8.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:17<00:05, 8.16it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:17<00:05, 7.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:18<00:07, 5.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:18<00:08, 5.25it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:18<00:08, 5.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:19<00:07, 5.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:19<00:11, 3.61it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:19<00:09, 4.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:20<00:09, 3.95it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:20<00:08, 4.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:20<00:04, 8.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:20<00:04, 7.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:20<00:05, 6.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:21<00:06, 4.88it/s]\u001b[A\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:21<00:05, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:21<00:05, 5.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:22<00:04, 6.03it/s]\u001b[A\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:22<00:03, 7.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:22<00:03, 6.84it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:22<00:02, 7.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:22<00:02, 7.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:23<00:03, 4.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:23<00:04, 4.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:23<00:03, 4.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:24<00:02, 6.38it/s]\u001b[A\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:24<00:01, 6.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:25<00:03, 3.30it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:25<00:01, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:25<00:01, 6.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:25<00:00, 6.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:26<00:00, 7.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:26<00:00, 5.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:46:12.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 4 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.4666666666666667}\u001b[0m\n",
"\u001b[32m2026-01-13 08:46:12.640\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5533333333333333}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 20%|██ | 4/20 [11:15<45:34, 170.90s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:46:12.644\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:46:33.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:48:20.046\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:48:30.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:48:30.850\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 5 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:24, 1.78s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:02<03:31, 1.43s/it]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:03<02:00, 1.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:03<01:46, 1.37it/s]\u001b[A\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<00:54, 2.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:23, 6.03it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:04<00:15, 8.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:17, 7.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:20, 6.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:05<00:23, 5.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:06<00:48, 2.68it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:06<00:42, 3.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:07<00:44, 2.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:07<00:36, 3.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:08<00:29, 4.20it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:08<00:25, 4.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:08<00:24, 4.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██ | 31/150 [00:08<00:24, 4.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:09<00:19, 5.80it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:09<00:19, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:09<00:14, 7.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:09<00:14, 7.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:10<00:30, 3.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:11<00:30, 3.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:11<00:30, 3.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:11<00:23, 4.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███ | 46/150 [00:11<00:24, 4.25it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:12<00:18, 5.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:12<00:11, 8.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:13<00:21, 4.55it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:13<00:16, 5.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:13<00:18, 5.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:13<00:13, 6.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:14<00:15, 5.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:14<00:13, 6.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:15<00:15, 5.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:15<00:15, 5.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:15<00:17, 4.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:15<00:20, 4.03it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:16<00:14, 5.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:16<00:14, 5.60it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:16<00:13, 5.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:16<00:16, 4.62it/s]\u001b[A\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:17<00:14, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:17<00:13, 5.41it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:17<00:13, 5.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:17<00:12, 5.68it/s]\u001b[A\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:17<00:11, 6.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:17<00:11, 6.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:18<00:12, 5.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:18<00:09, 7.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:19<00:13, 4.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:19<00:14, 4.35it/s]\u001b[A\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:19<00:12, 4.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:19<00:10, 5.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:20<00:11, 5.23it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:20<00:10, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:20<00:12, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:20<00:13, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:21<00:12, 4.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:21<00:13, 3.91it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:21<00:11, 4.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:22<00:06, 7.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:22<00:06, 7.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:22<00:05, 7.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:22<00:07, 5.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:23<00:09, 4.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:23<00:08, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:23<00:09, 4.33it/s]\u001b[A\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:23<00:07, 4.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:24<00:08, 4.29it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:24<00:07, 4.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:25<00:05, 6.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:25<00:04, 7.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:25<00:04, 6.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:26<00:06, 4.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:26<00:04, 5.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:26<00:03, 5.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:27<00:04, 5.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:27<00:05, 3.87it/s]\u001b[A\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:28<00:04, 4.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:28<00:05, 3.06it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:28<00:02, 5.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:29<00:02, 6.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:29<00:01, 6.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:29<00:01, 7.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:29<00:00, 7.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:30<00:00, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:30<00:00, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:30<00:00, 4.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:31<00:00, 3.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:31<00:00, 4.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:49:02.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 5 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.43333333333333335}\u001b[0m\n",
"\u001b[32m2026-01-13 08:49:02.688\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5533333333333333}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 25%|██▌ | 5/20 [14:05<42:38, 170.59s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:49:02.692\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:49:24.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:51:18.148\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:51:26.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:51:26.701\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 6 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:31, 1.82s/it]\u001b[A\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<02:02, 1.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:03<02:33, 1.04s/it]\u001b[A\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:03<01:40, 1.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<00:52, 2.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:24, 5.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:04<00:23, 5.80it/s]\u001b[A\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:19, 7.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:13, 9.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:04<00:12, 10.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:04<00:13, 9.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 24/150 [00:06<00:31, 4.03it/s]\u001b[A\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:06<00:42, 2.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:07<00:36, 3.41it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:07<00:33, 3.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██ | 31/150 [00:07<00:20, 5.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:07<00:20, 5.83it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:08<00:18, 6.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:08<00:17, 6.39it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:08<00:14, 7.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:08<00:10, 10.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:08<00:08, 13.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███ | 46/150 [00:10<00:25, 4.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:11<00:31, 3.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:11<00:29, 3.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:12<00:28, 3.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:12<00:19, 4.92it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:12<00:12, 7.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:12<00:11, 7.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:13<00:16, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:13<00:15, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:14<00:17, 4.77it/s]\u001b[A\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:14<00:16, 4.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:14<00:20, 4.02it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:15<00:14, 5.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:15<00:14, 5.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:15<00:24, 3.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:16<00:23, 3.24it/s]\u001b[A\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:16<00:19, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:17<00:17, 4.22it/s]\u001b[A\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:17<00:11, 6.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:17<00:08, 7.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:18<00:14, 4.33it/s]\u001b[A\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:18<00:14, 4.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:19<00:12, 4.84it/s]\u001b[A\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:19<00:09, 6.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:19<00:13, 4.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:20<00:14, 3.82it/s]\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:20<00:10, 5.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:20<00:09, 5.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:21<00:10, 4.80it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:21<00:10, 5.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:21<00:06, 6.90it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:21<00:07, 6.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:21<00:05, 7.60it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:21<00:05, 7.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:22<00:10, 4.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:23<00:12, 3.35it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:23<00:10, 3.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:23<00:09, 4.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:24<00:06, 5.59it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:24<00:06, 5.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:24<00:05, 5.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:24<00:05, 5.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:25<00:04, 6.22it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:25<00:04, 6.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:25<00:04, 6.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:25<00:06, 4.15it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:26<00:05, 4.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:26<00:09, 2.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:27<00:06, 3.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:27<00:06, 3.16it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:27<00:05, 3.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:27<00:05, 3.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:28<00:06, 2.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:28<00:06, 2.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:29<00:04, 3.81it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:29<00:03, 4.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:29<00:01, 6.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:29<00:01, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:30<00:01, 7.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:30<00:01, 6.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:30<00:01, 5.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:30<00:00, 6.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:31<00:00, 3.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:32<00:00, 2.56it/s]\u001b[A\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:32<00:00, 4.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:51:59.579\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 6 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.4866666666666667}\u001b[0m\n",
"\u001b[32m2026-01-13 08:51:59.580\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5533333333333333}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 30%|███ | 6/20 [17:02<40:18, 172.73s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:51:59.583\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:52:22.894\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:54:27.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:54:38.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:54:38.520\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 7 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:03, 1.64s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:02<02:55, 1.19s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:02<01:21, 1.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:10, 2.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:51, 2.75it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:35, 3.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:25, 5.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:17, 7.84it/s]\u001b[A\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:15, 8.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:18, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:05<00:19, 6.49it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:05<00:19, 6.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:05<00:19, 6.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:06<00:21, 5.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:06<00:30, 4.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:07<00:35, 3.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:07<00:33, 3.60it/s]\u001b[A\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:07<00:19, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:07<00:13, 8.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:08<00:15, 7.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:08<00:15, 7.26it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:08<00:12, 8.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:09<00:19, 5.32it/s]\u001b[A\n",
"Evaluating workflow: 31%|███ | 46/150 [00:09<00:15, 6.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:10<00:19, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:10<00:16, 6.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:10<00:17, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:11<00:21, 4.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:11<00:14, 6.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:11<00:16, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:12<00:11, 7.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:12<00:13, 6.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:12<00:12, 6.74it/s]\u001b[A\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:12<00:12, 6.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:13<00:16, 4.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:13<00:13, 5.75it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:13<00:10, 7.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:13<00:10, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:14<00:18, 4.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:15<00:18, 3.93it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:15<00:17, 4.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:15<00:12, 5.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:15<00:11, 6.12it/s]\u001b[A\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:15<00:08, 7.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 85/150 [00:16<00:11, 5.71it/s]\u001b[A\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:16<00:08, 7.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:16<00:09, 6.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:17<00:10, 5.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:17<00:10, 5.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:17<00:12, 4.59it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:17<00:11, 5.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:17<00:08, 6.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:18<00:09, 5.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:18<00:08, 5.74it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:18<00:08, 6.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:19<00:10, 4.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:19<00:10, 4.40it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:19<00:07, 6.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:19<00:06, 6.51it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:19<00:07, 6.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:20<00:05, 7.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:20<00:06, 6.55it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:20<00:05, 7.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:20<00:08, 4.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:21<00:06, 5.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:21<00:06, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:22<00:09, 3.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:22<00:09, 3.58it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:22<00:07, 4.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:22<00:07, 4.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:23<00:09, 3.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:23<00:05, 4.76it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:23<00:02, 8.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:24<00:02, 7.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:24<00:02, 6.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:24<00:03, 5.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:25<00:03, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:25<00:02, 4.91it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:25<00:02, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:26<00:02, 4.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:27<00:03, 3.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:27<00:02, 3.36it/s]\u001b[A\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:27<00:02, 3.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:27<00:01, 4.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:28<00:00, 4.94it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:28<00:00, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:28<00:00, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:29<00:00, 2.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:29<00:00, 5.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:55:08.397\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 7 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5133333333333333}\u001b[0m\n",
"\u001b[32m2026-01-13 08:55:08.398\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5533333333333333}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 35%|███▌ | 7/20 [20:10<38:33, 177.99s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:55:08.401\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:55:27.245\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 08:57:16.317\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 08:57:29.023\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 08:57:29.024\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 8 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<03:54, 1.57s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<01:53, 1.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:02<01:17, 1.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:30, 1.60it/s]\u001b[A\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:50, 2.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:35, 4.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:29, 4.67it/s]\u001b[A\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:19, 6.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:14, 9.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:04<00:13, 9.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:04<00:13, 9.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 24/150 [00:05<00:20, 6.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:06<00:23, 5.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:06<00:33, 3.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:07<00:33, 3.63it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 32/150 [00:07<00:21, 5.38it/s]\u001b[A\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:07<00:25, 4.50it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:07<00:19, 5.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:08<00:12, 8.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:08<00:14, 7.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:08<00:12, 8.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:09<00:15, 6.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:09<00:18, 5.63it/s]\u001b[A\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:09<00:14, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:09<00:13, 7.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:10<00:19, 4.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:11<00:24, 3.95it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:11<00:18, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:11<00:20, 4.55it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:11<00:14, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:12<00:12, 6.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:12<00:09, 9.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:12<00:08, 10.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:13<00:11, 6.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:13<00:11, 7.17it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:13<00:10, 7.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:13<00:11, 6.87it/s]\u001b[A\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:13<00:11, 6.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:13<00:09, 7.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:14<00:13, 5.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:14<00:13, 5.28it/s]\u001b[A\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:14<00:13, 5.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:14<00:12, 5.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:15<00:13, 4.89it/s]\u001b[A\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:15<00:09, 7.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:15<00:12, 5.08it/s]\u001b[A\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:16<00:09, 6.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:16<00:09, 6.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:16<00:12, 4.67it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:16<00:07, 7.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:17<00:05, 9.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:17<00:06, 8.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:17<00:05, 8.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:18<00:09, 5.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:18<00:09, 5.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:18<00:10, 4.27it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:19<00:07, 5.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:19<00:08, 5.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:19<00:08, 5.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:20<00:10, 3.98it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:20<00:07, 5.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:20<00:05, 7.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:20<00:04, 7.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:20<00:04, 7.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:21<00:04, 6.42it/s]\u001b[A\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:21<00:04, 6.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:21<00:03, 7.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:21<00:03, 7.14it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:21<00:03, 6.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:21<00:02, 9.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:22<00:04, 4.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:23<00:04, 4.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:23<00:05, 3.96it/s]\u001b[A\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:23<00:04, 4.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:23<00:03, 4.58it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:24<00:03, 5.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:24<00:02, 6.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:24<00:02, 4.76it/s]\u001b[A\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:24<00:01, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:24<00:01, 6.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:25<00:00, 7.39it/s]\u001b[A\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:25<00:00, 7.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:25<00:00, 6.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:25<00:00, 6.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:26<00:00, 3.99it/s]\u001b[A\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:26<00:00, 4.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:26<00:00, 3.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:27<00:00, 5.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 08:57:56.635\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 8 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5666666666666667}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 40%|████ | 8/20 [22:59<34:58, 174.89s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 08:57:56.636\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 08:58:14.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:00:05.102\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:00:17.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:00:17.953\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 9 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<03:42, 1.49s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<01:59, 1.24it/s]\u001b[A\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:01<00:50, 2.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:02<00:46, 3.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<01:20, 1.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:41, 3.42it/s]\u001b[A\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:03<00:25, 5.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:23, 5.85it/s]\u001b[A\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:18, 7.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:17, 7.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:04<00:16, 8.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:04<00:17, 7.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 24/150 [00:05<00:16, 7.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:05<00:18, 6.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:06<00:33, 3.69it/s]\u001b[A\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:06<00:29, 4.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:06<00:30, 4.05it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:06<00:27, 4.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:06<00:23, 5.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██ | 31/150 [00:07<00:28, 4.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:07<00:26, 4.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:08<00:32, 3.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:08<00:25, 4.47it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:08<00:22, 4.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:08<00:20, 5.29it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:08<00:19, 5.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:09<00:17, 6.29it/s]\u001b[A\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:09<00:12, 8.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:09<00:10, 10.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:09<00:18, 5.58it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:10<00:17, 5.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:10<00:25, 3.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:11<00:29, 3.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:11<00:32, 3.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:11<00:29, 3.33it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:11<00:15, 6.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:12<00:20, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:12<00:20, 4.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:13<00:16, 5.46it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 61/150 [00:13<00:16, 5.35it/s]\u001b[A\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:13<00:15, 5.54it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:13<00:11, 7.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:13<00:09, 8.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:14<00:11, 7.04it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:14<00:09, 8.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:14<00:13, 5.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:15<00:14, 5.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:15<00:14, 5.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:15<00:13, 5.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:16<00:14, 5.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:16<00:14, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:16<00:14, 4.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:17<00:11, 5.69it/s]\u001b[A\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:17<00:10, 6.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:17<00:18, 3.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:18<00:14, 4.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:18<00:15, 3.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:18<00:15, 3.84it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:18<00:08, 6.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:19<00:07, 7.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:19<00:08, 6.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:19<00:07, 7.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:20<00:11, 4.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:20<00:11, 4.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:20<00:11, 4.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:21<00:12, 3.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:21<00:11, 3.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:21<00:07, 6.08it/s]\u001b[A\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:21<00:07, 5.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:22<00:09, 4.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:22<00:09, 4.41it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:22<00:06, 6.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:23<00:08, 4.12it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:23<00:05, 5.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:23<00:04, 7.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:24<00:05, 5.61it/s]\u001b[A\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:24<00:05, 5.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:24<00:06, 4.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:24<00:06, 4.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:25<00:03, 6.04it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:25<00:03, 5.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:25<00:03, 5.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:26<00:04, 5.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:26<00:04, 4.70it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:26<00:02, 6.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:27<00:04, 3.85it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:27<00:03, 4.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:27<00:02, 5.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:27<00:02, 4.57it/s]\u001b[A\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:27<00:02, 4.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:28<00:01, 5.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:28<00:01, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:28<00:01, 5.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:29<00:01, 3.73it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:29<00:00, 4.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:30<00:00, 4.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:30<00:00, 3.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:32<00:00, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:00:51.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 9 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5266666666666666}\u001b[0m\n",
"\u001b[32m2026-01-13 09:00:51.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5666666666666667}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 45%|████▌ | 9/20 [25:53<32:02, 174.73s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:00:51.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:01:16.020\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:03:13.373\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:03:25.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:03:25.606\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 10 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:33, 1.83s/it]\u001b[A\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<02:05, 1.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:02<01:17, 1.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:23, 1.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<01:12, 1.99it/s]\u001b[A\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:58, 2.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▌ | 8/150 [00:04<00:49, 2.85it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:04<00:41, 3.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:04<00:27, 5.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:04<00:31, 4.34it/s]\u001b[A\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:05<00:28, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█ | 16/150 [00:05<00:21, 6.30it/s]\u001b[A\n",
"Evaluating workflow: 11%|█▏ | 17/150 [00:05<00:21, 6.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:05<00:15, 8.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:05<00:17, 7.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:06<00:13, 9.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:07<00:22, 5.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:07<00:26, 4.54it/s]\u001b[A\n",
"Evaluating workflow: 21%|██ | 31/150 [00:07<00:23, 4.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:07<00:22, 5.20it/s]\u001b[A\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:08<00:20, 5.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:08<00:14, 8.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:08<00:15, 7.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:08<00:18, 5.98it/s]\u001b[A\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:09<00:12, 8.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:09<00:11, 9.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███ | 46/150 [00:09<00:17, 6.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:10<00:18, 5.54it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:10<00:18, 5.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:10<00:16, 6.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:10<00:15, 6.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:11<00:18, 5.31it/s]\u001b[A\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:11<00:17, 5.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:11<00:17, 5.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:11<00:15, 5.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:12<00:14, 6.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:12<00:13, 6.41it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:12<00:11, 7.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:12<00:12, 6.71it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:12<00:13, 6.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:13<00:13, 6.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:13<00:16, 4.93it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:13<00:11, 7.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:13<00:11, 7.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:14<00:11, 6.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:14<00:16, 4.60it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:14<00:11, 6.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:15<00:10, 6.65it/s]\u001b[A\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:15<00:10, 6.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:15<00:09, 7.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:15<00:10, 6.21it/s]\u001b[A\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:15<00:06, 9.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:16<00:10, 5.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:16<00:09, 6.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:17<00:12, 4.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:17<00:08, 6.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:17<00:09, 5.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:18<00:10, 5.33it/s]\u001b[A\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:18<00:09, 5.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:18<00:08, 5.78it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:18<00:07, 6.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:18<00:08, 6.24it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:18<00:04, 10.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 105/150 [00:19<00:06, 7.27it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:19<00:06, 6.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:19<00:05, 7.26it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:19<00:05, 6.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:20<00:06, 6.03it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:20<00:05, 6.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:20<00:05, 6.65it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:20<00:03, 9.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:21<00:05, 5.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:21<00:07, 4.40it/s]\u001b[A\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:21<00:06, 4.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:22<00:05, 5.10it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:22<00:04, 5.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:22<00:04, 6.13it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:22<00:04, 6.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:22<00:04, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:23<00:01, 9.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:23<00:01, 9.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:23<00:01, 7.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:23<00:01, 7.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:24<00:02, 6.42it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:24<00:00, 10.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:24<00:00, 7.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:25<00:01, 3.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:26<00:01, 3.17it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:26<00:00, 3.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:26<00:00, 3.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:27<00:00, 5.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:03:53.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 10 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5333333333333333}\u001b[0m\n",
"\u001b[32m2026-01-13 09:03:53.185\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5666666666666667}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 50%|█████ | 10/20 [28:55<29:30, 177.03s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:03:53.188\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:04:12.867\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:06:14.518\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:06:26.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:06:26.768\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 11 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:56, 1.99s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:02<01:26, 1.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:02<01:22, 1.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:44, 3.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:28, 4.84it/s]\u001b[A\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:03<00:22, 6.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 14/150 [00:03<00:21, 6.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:21, 6.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:04<00:16, 7.87it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:04<00:12, 10.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 24/150 [00:05<00:26, 4.83it/s]\u001b[A\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:05<00:21, 5.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:06<00:21, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:06<00:20, 5.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:06<00:25, 4.68it/s]\u001b[A\n",
"Evaluating workflow: 21%|██ | 31/150 [00:06<00:25, 4.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:07<00:28, 4.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:07<00:17, 6.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:07<00:19, 5.74it/s]\u001b[A\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:08<00:18, 6.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:08<00:17, 6.23it/s]\u001b[A\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:08<00:10, 9.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:08<00:12, 8.28it/s]\u001b[A\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:08<00:10, 9.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:10<00:32, 3.10it/s]\u001b[A\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:10<00:28, 3.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:10<00:20, 4.81it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:10<00:13, 7.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:11<00:14, 6.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:11<00:12, 7.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:11<00:12, 7.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:11<00:11, 7.60it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:12<00:12, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:12<00:12, 7.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:12<00:17, 4.84it/s]\u001b[A\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:12<00:16, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:13<00:15, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:13<00:27, 2.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:14<00:24, 3.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:14<00:27, 2.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:14<00:19, 3.98it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:15<00:10, 6.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:15<00:09, 7.75it/s]\u001b[A\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:15<00:07, 9.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:15<00:06, 9.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 85/150 [00:16<00:10, 6.41it/s]\u001b[A\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:16<00:08, 7.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:16<00:11, 5.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:17<00:14, 4.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:17<00:18, 3.22it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:17<00:12, 4.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:18<00:13, 4.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:18<00:16, 3.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:18<00:11, 4.55it/s]\u001b[A\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:18<00:11, 4.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:19<00:10, 5.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:19<00:11, 4.58it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:19<00:09, 5.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:19<00:04, 10.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:19<00:03, 11.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:20<00:05, 7.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:20<00:05, 7.47it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:20<00:05, 7.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:21<00:05, 6.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:21<00:05, 5.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:22<00:07, 4.25it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:22<00:06, 4.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:22<00:09, 3.13it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:23<00:06, 4.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:23<00:06, 4.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:23<00:07, 3.68it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:23<00:06, 4.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:24<00:05, 4.53it/s]\u001b[A\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:24<00:01, 10.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:24<00:01, 10.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:25<00:02, 6.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:25<00:01, 6.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:26<00:02, 3.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:27<00:02, 3.78it/s]\u001b[A\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:27<00:01, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:27<00:01, 3.76it/s]\u001b[A\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:27<00:01, 4.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:28<00:00, 4.83it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:28<00:00, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:29<00:00, 3.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:29<00:00, 5.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-13 09:06:56.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 11 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5266666666666666}\u001b[0m\n",
"\u001b[32m2026-01-13 09:06:56.703\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5666666666666667}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 55%|█████▌ | 11/20 [31:59<26:51, 179.01s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:06:56.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:07:18.630\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:09:15.841\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:09:29.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:09:29.115\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 12 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<03:22, 1.36s/it]\u001b[A\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<01:37, 1.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:01<00:41, 3.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:02<00:40, 3.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:02<00:41, 3.45it/s]\u001b[A\n",
"Evaluating workflow: 5%|▌ | 8/150 [00:02<00:35, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:02<00:32, 4.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:44, 3.13it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:03<00:24, 5.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:03<00:23, 5.82it/s]\u001b[A\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:03<00:12, 10.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:03<00:10, 11.86it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:04<00:09, 14.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:04<00:10, 11.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:04<00:16, 7.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:05<00:21, 5.74it/s]\u001b[A\n",
"Evaluating workflow: 21%|██ | 31/150 [00:05<00:17, 6.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:05<00:15, 7.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:06<00:26, 4.33it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:06<00:21, 5.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:07<00:23, 4.85it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:07<00:14, 7.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:07<00:19, 5.60it/s]\u001b[A\n",
"Evaluating workflow: 31%|███ | 46/150 [00:07<00:12, 8.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:08<00:10, 9.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:08<00:11, 8.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:09<00:16, 5.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:09<00:17, 5.55it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:09<00:15, 5.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:09<00:14, 6.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:10<00:21, 4.16it/s]\u001b[A\n",
"Evaluating workflow: 40%|████ | 60/150 [00:10<00:19, 4.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:10<00:16, 5.27it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:10<00:09, 8.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:11<00:10, 7.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:11<00:11, 6.91it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:11<00:10, 7.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:11<00:09, 8.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:12<00:09, 8.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:12<00:07, 10.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:12<00:08, 8.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:12<00:09, 7.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:13<00:10, 6.34it/s]\u001b[A\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:13<00:06, 9.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:13<00:10, 5.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:14<00:12, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:14<00:10, 5.80it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:14<00:09, 6.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:15<00:08, 7.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:15<00:05, 8.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:15<00:07, 7.09it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:16<00:06, 7.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:16<00:05, 7.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:16<00:07, 6.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:16<00:06, 6.86it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:17<00:05, 8.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:17<00:05, 7.91it/s]\u001b[A\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:17<00:05, 7.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:18<00:09, 3.84it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:18<00:08, 4.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:18<00:07, 4.39it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:18<00:06, 5.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:18<00:05, 5.62it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:19<00:04, 7.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:19<00:03, 8.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:19<00:03, 6.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:19<00:03, 7.51it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:20<00:03, 7.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:20<00:02, 8.54it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:20<00:01, 11.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:21<00:02, 6.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:21<00:02, 6.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:21<00:02, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:22<00:02, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:22<00:02, 4.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:22<00:01, 4.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:23<00:00, 7.11it/s]\u001b[A\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:23<00:00, 6.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:23<00:00, 5.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:24<00:00, 3.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:24<00:00, 2.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:27<00:00, 5.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:09:56.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 12 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5}\u001b[0m\n",
"\u001b[32m2026-01-13 09:09:56.348\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5666666666666667}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 60%|██████ | 12/20 [34:58<23:53, 179.20s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:09:56.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:10:15.363\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:11:59.511\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:12:10.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:12:10.923\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 13 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<03:37, 1.46s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:01<01:09, 2.11it/s]\u001b[A\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:01<00:25, 5.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:02<00:34, 4.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:02<00:35, 3.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:40, 3.42it/s]\u001b[A\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:03<00:34, 4.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:03<00:23, 5.73it/s]\u001b[A\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:03<00:11, 11.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:03<00:11, 11.05it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:04<00:11, 11.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:04<00:12, 9.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:05<00:23, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:05<00:16, 7.12it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:05<00:10, 11.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:06<00:22, 5.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:07<00:22, 5.04it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:07<00:17, 6.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:07<00:14, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:07<00:17, 6.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:07<00:12, 8.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:08<00:14, 6.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:08<00:13, 7.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:09<00:14, 6.47it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:09<00:12, 7.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:09<00:10, 8.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:09<00:12, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:10<00:18, 4.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:11<00:22, 3.81it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:11<00:16, 5.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:11<00:10, 8.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:11<00:12, 6.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:12<00:11, 6.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:12<00:10, 7.04it/s]\u001b[A\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:12<00:08, 8.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:12<00:08, 8.52it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 83/150 [00:13<00:06, 10.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:13<00:09, 7.04it/s]\u001b[A\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:13<00:08, 7.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:13<00:07, 8.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:15<00:13, 4.23it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:15<00:12, 4.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:15<00:09, 5.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:15<00:09, 5.58it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:15<00:06, 8.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:16<00:08, 5.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:16<00:07, 5.92it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:17<00:06, 7.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:17<00:04, 8.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:17<00:04, 8.02it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:17<00:03, 10.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:18<00:04, 7.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:18<00:06, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:18<00:06, 5.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:19<00:06, 4.65it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:19<00:03, 7.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:19<00:03, 8.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:19<00:03, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:20<00:03, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:20<00:02, 7.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:20<00:02, 6.51it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:20<00:02, 6.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:21<00:02, 6.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:21<00:02, 7.11it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:21<00:01, 9.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:21<00:01, 10.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:21<00:01, 7.42it/s]\u001b[A\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:22<00:01, 7.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:22<00:01, 4.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:22<00:01, 4.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:23<00:01, 4.15it/s]\u001b[A\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:23<00:00, 4.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:24<00:00, 3.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:24<00:00, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:24<00:00, 6.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:12:35.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 13 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5733333333333334}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 65%|██████▌ | 13/20 [37:38<20:12, 173.14s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:12:35.548\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:12:53.454\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:14:39.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:14:55.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:14:55.779\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 14 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:24, 1.78s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:02<00:37, 3.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▌ | 8/150 [00:03<00:50, 2.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:38, 3.61it/s]\u001b[A\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:03<00:28, 4.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:03<00:26, 5.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█ | 16/150 [00:03<00:21, 6.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:18, 7.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:04<00:17, 7.22it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:04<00:16, 7.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:04<00:17, 7.31it/s]\u001b[A\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:04<00:13, 9.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:05<00:21, 5.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:05<00:14, 8.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:06<00:23, 4.98it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:06<00:18, 6.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:06<00:13, 8.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:07<00:23, 4.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:07<00:21, 5.11it/s]\u001b[A\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:08<00:20, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:08<00:19, 5.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███ | 46/150 [00:08<00:16, 6.23it/s]\u001b[A\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:08<00:16, 6.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:09<00:15, 6.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:09<00:15, 6.32it/s]\u001b[A\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:09<00:11, 8.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:09<00:09, 9.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:09<00:10, 8.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:10<00:10, 9.01it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:10<00:11, 7.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:10<00:15, 5.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:10<00:14, 6.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:11<00:20, 4.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:12<00:19, 4.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:12<00:17, 4.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:12<00:15, 5.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:13<00:14, 5.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:13<00:12, 6.33it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:13<00:09, 7.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:13<00:08, 8.56it/s]\u001b[A\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:13<00:07, 9.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:13<00:06, 9.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:14<00:07, 8.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:14<00:12, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:15<00:11, 5.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:15<00:11, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:15<00:13, 4.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:15<00:13, 4.35it/s]\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:16<00:07, 7.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:16<00:08, 6.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:16<00:09, 5.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:17<00:14, 3.69it/s]\u001b[A\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:17<00:06, 7.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:17<00:05, 8.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:17<00:05, 7.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:18<00:06, 6.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:18<00:06, 6.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:19<00:06, 5.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:19<00:05, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:19<00:05, 6.40it/s]\u001b[A\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:19<00:05, 6.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:20<00:04, 7.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:20<00:04, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:20<00:05, 4.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:21<00:05, 4.64it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 126/150 [00:21<00:03, 6.91it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:21<00:03, 6.39it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:21<00:03, 6.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:21<00:03, 6.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:22<00:03, 5.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:22<00:04, 4.64it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:22<00:03, 4.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:23<00:04, 3.77it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:23<00:03, 4.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:23<00:02, 5.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:23<00:02, 5.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:23<00:01, 6.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:24<00:01, 5.99it/s]\u001b[A\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:24<00:01, 6.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:24<00:01, 4.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:24<00:00, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:25<00:00, 5.00it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:25<00:00, 5.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:25<00:00, 6.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:26<00:00, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:15:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 14 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5133333333333333}\u001b[0m\n",
"\u001b[32m2026-01-13 09:15:21.931\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5733333333333334}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 70%|███████ | 14/20 [40:24<17:06, 171.10s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:15:21.935\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:15:44.969\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:17:48.474\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:18:05.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:18:05.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 15 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:14, 1.71s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:02<01:30, 1.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:02<01:24, 1.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:03<01:24, 1.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:03<01:08, 2.12it/s]\u001b[A\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:51, 2.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:25, 5.50it/s]\u001b[A\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:25, 5.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:04<00:25, 5.38it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:04<00:25, 5.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:14, 8.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:05<00:17, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:05<00:21, 6.02it/s]\u001b[A\n",
"Evaluating workflow: 16%|█▌ | 24/150 [00:05<00:21, 5.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:06<00:20, 6.18it/s]\u001b[A\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:06<00:20, 5.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:06<00:20, 5.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:07<00:33, 3.59it/s]\u001b[A\n",
"Evaluating workflow: 21%|██ | 31/150 [00:07<00:29, 4.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:08<00:25, 4.55it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:08<00:19, 5.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:08<00:13, 8.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:08<00:13, 8.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:09<00:18, 5.78it/s]\u001b[A\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:09<00:17, 6.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:09<00:11, 9.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:09<00:18, 5.46it/s]\u001b[A\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:10<00:15, 6.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:10<00:12, 8.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:10<00:12, 7.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:11<00:23, 4.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:11<00:16, 5.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:12<00:13, 6.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:12<00:13, 6.31it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:12<00:13, 6.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:13<00:17, 4.86it/s]\u001b[A\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:13<00:12, 6.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:13<00:16, 5.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:13<00:15, 5.08it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:14<00:11, 6.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:14<00:09, 7.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:14<00:13, 5.46it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:14<00:12, 5.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:15<00:11, 6.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:15<00:18, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:16<00:14, 4.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:16<00:11, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:17<00:12, 5.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:17<00:13, 4.58it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:17<00:10, 5.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:17<00:09, 6.14it/s]\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:17<00:07, 7.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:18<00:08, 6.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:18<00:09, 5.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:19<00:09, 5.13it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:19<00:09, 5.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:19<00:08, 5.91it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:19<00:06, 7.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:20<00:09, 4.55it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:20<00:08, 5.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:20<00:05, 7.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:20<00:05, 6.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:21<00:07, 4.96it/s]\u001b[A\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:21<00:06, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:22<00:08, 4.15it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:22<00:04, 6.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:22<00:04, 6.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:22<00:05, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:23<00:05, 5.02it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:23<00:03, 6.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:23<00:03, 6.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:23<00:03, 6.17it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:23<00:02, 7.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:24<00:03, 5.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:24<00:02, 6.79it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:24<00:02, 6.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:25<00:03, 5.14it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:25<00:02, 6.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:26<00:03, 3.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:26<00:02, 3.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:27<00:02, 3.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:27<00:02, 3.39it/s]\u001b[A\n",
"Evaluating workflow: 95%|█████████▌| 143/150 [00:27<00:01, 3.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:27<00:00, 5.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:27<00:00, 5.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:28<00:00, 5.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:29<00:01, 2.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:29<00:00, 2.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:30<00:00, 4.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:18:35.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 15 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.47333333333333333}\u001b[0m\n",
"\u001b[32m2026-01-13 09:18:35.784\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5733333333333334}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 75%|███████▌ | 15/20 [43:38<14:49, 177.96s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:18:35.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:18:55.442\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:20:36.211\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:20:49.108\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:20:49.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 16 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:22, 1.76s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 4/150 [00:02<01:16, 1.91it/s]\u001b[A\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:02<01:01, 2.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 6/150 [00:02<00:50, 2.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:44, 3.22it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:28, 4.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:21, 6.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 12/150 [00:03<00:22, 6.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:04<00:29, 4.55it/s]\u001b[A\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:04<00:13, 9.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:04<00:12, 10.14it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:04<00:11, 10.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:05<00:20, 6.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:06<00:29, 4.21it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:06<00:28, 4.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:06<00:32, 3.77it/s]\u001b[A\n",
"Evaluating workflow: 20%|██ | 30/150 [00:06<00:29, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:07<00:19, 5.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:07<00:19, 6.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:07<00:22, 5.04it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:07<00:16, 6.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:07<00:11, 9.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:08<00:12, 8.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:08<00:10, 9.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███ | 46/150 [00:08<00:14, 7.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:09<00:18, 5.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:09<00:19, 5.30it/s]\u001b[A\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:09<00:17, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:09<00:13, 7.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▍ | 52/150 [00:10<00:19, 4.99it/s]\u001b[A\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:10<00:18, 5.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:10<00:14, 6.39it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:10<00:15, 6.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:10<00:14, 6.50it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:10<00:10, 8.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:10<00:08, 10.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 63/150 [00:11<00:10, 8.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:11<00:13, 6.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:12<00:15, 5.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:12<00:16, 5.00it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 69/150 [00:12<00:12, 6.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:12<00:13, 5.82it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 72/150 [00:13<00:13, 5.63it/s]\u001b[A\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:13<00:10, 7.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:13<00:11, 6.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:13<00:10, 7.02it/s]\u001b[A\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:13<00:09, 7.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:14<00:11, 5.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 81/150 [00:14<00:18, 3.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:15<00:11, 5.77it/s]\u001b[A\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:15<00:09, 6.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:15<00:07, 8.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:15<00:09, 6.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:16<00:09, 5.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:16<00:14, 4.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:16<00:13, 4.12it/s]\u001b[A\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:16<00:11, 4.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:17<00:12, 4.21it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:17<00:09, 5.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:18<00:11, 4.26it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:18<00:06, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:18<00:07, 6.20it/s]\u001b[A\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:18<00:05, 7.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:19<00:06, 6.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:19<00:07, 5.10it/s]\u001b[A\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:19<00:06, 6.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:20<00:06, 5.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:20<00:05, 6.22it/s]\u001b[A\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:20<00:05, 6.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:20<00:05, 5.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:21<00:04, 6.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:22<00:06, 4.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:22<00:06, 4.29it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:22<00:04, 5.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:22<00:02, 8.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:23<00:02, 6.38it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:23<00:02, 6.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:23<00:01, 7.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:23<00:01, 7.84it/s]\u001b[A\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:23<00:01, 7.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:24<00:01, 5.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:25<00:01, 4.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:25<00:01, 4.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:25<00:01, 4.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:26<00:00, 4.42it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:26<00:00, 3.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:26<00:00, 4.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:27<00:00, 3.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:28<00:00, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:21:18.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 16 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.48}\u001b[0m\n",
"\u001b[32m2026-01-13 09:21:18.068\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.5733333333333334}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 80%|████████ | 16/20 [46:20<11:32, 173.24s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:21:18.071\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:21:35.772\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:23:26.215\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:23:37.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:23:37.637\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 17 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:05, 1.65s/it]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:01<01:08, 2.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:01<00:39, 3.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▌ | 8/150 [00:03<00:49, 2.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 9%|▉ | 14/150 [00:03<00:21, 6.21it/s]\u001b[A\n",
"Evaluating workflow: 11%|█▏ | 17/150 [00:03<00:16, 7.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▍ | 22/150 [00:03<00:10, 12.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:04<00:17, 7.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:04<00:16, 7.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:05<00:18, 6.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██ | 31/150 [00:05<00:17, 6.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:05<00:17, 6.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:06<00:19, 6.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:06<00:24, 4.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:06<00:25, 4.43it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:06<00:18, 6.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:07<00:18, 6.06it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:07<00:14, 7.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:07<00:15, 7.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:07<00:13, 7.52it/s]\u001b[A\n",
"Evaluating workflow: 31%|███ | 46/150 [00:07<00:14, 7.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:07<00:07, 13.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:08<00:11, 8.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:09<00:15, 6.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:09<00:14, 6.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:09<00:16, 5.49it/s]\u001b[A\n",
"Evaluating workflow: 40%|████ | 60/150 [00:09<00:12, 6.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:10<00:11, 7.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:10<00:18, 4.67it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:10<00:16, 5.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:11<00:12, 6.37it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:11<00:10, 7.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:11<00:07, 10.19it/s]\u001b[A\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:11<00:06, 11.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:12<00:09, 7.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:12<00:14, 4.85it/s]\u001b[A\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:13<00:10, 6.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:13<00:08, 7.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:13<00:07, 8.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:13<00:07, 7.84it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:13<00:06, 9.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:14<00:10, 5.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:14<00:10, 5.22it/s]\u001b[A\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:15<00:07, 7.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:15<00:06, 7.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:15<00:05, 9.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:16<00:06, 6.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:16<00:10, 4.36it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:16<00:05, 7.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:17<00:04, 9.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:17<00:03, 11.45it/s]\u001b[A\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:17<00:02, 12.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:17<00:04, 6.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:18<00:05, 5.47it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:18<00:04, 5.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:18<00:03, 7.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:18<00:02, 8.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:19<00:02, 7.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:19<00:03, 6.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:20<00:03, 5.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:20<00:03, 4.73it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:20<00:02, 6.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:20<00:02, 5.88it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:21<00:02, 5.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:21<00:02, 5.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:21<00:01, 7.47it/s]\u001b[A\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:21<00:00, 9.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:22<00:00, 9.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:23<00:00, 4.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:23<00:00, 6.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:24:01.450\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 17 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.58}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 85%|████████▌ | 17/20 [49:03<08:30, 170.28s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:24:01.451\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:24:20.781\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:26:07.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:26:20.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:26:20.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 18 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:37, 1.86s/it]\u001b[A\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:02<00:44, 3.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:02<00:34, 4.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:45, 3.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:52, 2.69it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:03<00:30, 4.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 11%|█ | 16/150 [00:04<00:21, 6.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:04<00:19, 6.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:04<00:14, 8.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:05<00:18, 6.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:05<00:23, 5.28it/s]\u001b[A\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:05<00:18, 6.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:06<00:18, 6.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██ | 31/150 [00:06<00:17, 6.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:07<00:25, 4.60it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:07<00:23, 4.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:07<00:20, 5.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:07<00:16, 6.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:07<00:16, 6.62it/s]\u001b[A\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:08<00:10, 9.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███ | 46/150 [00:08<00:16, 6.36it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:09<00:17, 5.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:09<00:20, 5.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:09<00:21, 4.69it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:09<00:18, 5.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 35%|███▌ | 53/150 [00:10<00:18, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 55/150 [00:11<00:16, 5.61it/s]\u001b[A\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:11<00:20, 4.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:11<00:24, 3.77it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:11<00:16, 5.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:11<00:11, 7.60it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:11<00:09, 9.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:12<00:08, 9.53it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 69/150 [00:12<00:08, 9.64it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:12<00:07, 10.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:13<00:11, 6.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▉ | 74/150 [00:13<00:14, 5.23it/s]\u001b[A\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:13<00:10, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 78/150 [00:13<00:08, 8.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:14<00:10, 6.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:14<00:08, 7.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:14<00:11, 5.95it/s]\u001b[A\n",
"Evaluating workflow: 56%|█████▌ | 84/150 [00:14<00:10, 6.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:15<00:12, 5.24it/s]\u001b[A\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:15<00:11, 5.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:15<00:07, 7.78it/s]\u001b[A\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:15<00:08, 7.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 90/150 [00:15<00:09, 6.13it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:15<00:07, 7.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:16<00:06, 8.96it/s]\u001b[A\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:16<00:05, 9.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 100/150 [00:16<00:06, 7.37it/s]\u001b[A\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:17<00:06, 7.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 102/150 [00:17<00:06, 7.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:17<00:10, 4.68it/s]\u001b[A\n",
"Evaluating workflow: 69%|██████▉ | 104/150 [00:17<00:08, 5.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████ | 106/150 [00:17<00:06, 6.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:18<00:07, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:18<00:05, 7.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:18<00:07, 5.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▌ | 113/150 [00:19<00:06, 6.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 116/150 [00:19<00:04, 8.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:19<00:03, 9.19it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████▏ | 122/150 [00:19<00:02, 10.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:20<00:03, 7.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:20<00:03, 6.33it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:21<00:02, 7.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 87%|████████▋ | 130/150 [00:21<00:02, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:21<00:02, 6.67it/s]\u001b[A\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:21<00:02, 6.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:21<00:02, 7.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:22<00:03, 4.17it/s]\u001b[A\n",
"Evaluating workflow: 91%|█████████ | 136/150 [00:22<00:03, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 138/150 [00:22<00:01, 6.02it/s]\u001b[A\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:22<00:01, 7.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:23<00:00, 8.60it/s]\u001b[A\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:23<00:00, 9.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:23<00:00, 11.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:24<00:00, 6.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:24<00:00, 6.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-13 09:26:44.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 18 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5266666666666666}\u001b[0m\n",
"\u001b[32m2026-01-13 09:26:44.806\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.58}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 90%|█████████ | 18/20 [51:47<05:36, 168.20s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:26:44.810\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:27:06.749\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:29:00.851\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:29:12.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:29:12.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 19 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<03:15, 1.31s/it]\u001b[A\n",
"Evaluating workflow: 1%|▏ | 2/150 [00:01<01:32, 1.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:01<00:29, 4.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:01<00:23, 6.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:02<00:29, 4.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 10/150 [00:03<00:42, 3.32it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:03<00:25, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 15/150 [00:03<00:19, 6.81it/s]\u001b[A\n",
"Evaluating workflow: 11%|█▏ | 17/150 [00:03<00:17, 7.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 13%|█▎ | 19/150 [00:03<00:15, 8.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 21/150 [00:03<00:13, 9.77it/s]\u001b[A\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:03<00:11, 10.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 26/150 [00:04<00:09, 13.02it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:04<00:09, 13.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:04<00:17, 6.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 21%|██▏ | 32/150 [00:05<00:15, 7.57it/s]\u001b[A\n",
"Evaluating workflow: 23%|██▎ | 34/150 [00:05<00:13, 8.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:05<00:21, 5.37it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:06<00:20, 5.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 25%|██▌ | 38/150 [00:06<00:18, 5.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 39/150 [00:06<00:20, 5.41it/s]\u001b[A\n",
"Evaluating workflow: 27%|██▋ | 41/150 [00:06<00:14, 7.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▊ | 43/150 [00:06<00:12, 8.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:06<00:12, 8.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 31%|███▏ | 47/150 [00:07<00:12, 8.45it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:07<00:11, 8.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 50/150 [00:07<00:09, 10.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:07<00:08, 10.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 56/150 [00:08<00:08, 10.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:08<00:11, 8.32it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▉ | 59/150 [00:08<00:11, 7.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:08<00:15, 5.92it/s]\u001b[A\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:09<00:11, 7.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:09<00:10, 8.52it/s]\u001b[A\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:09<00:08, 10.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 45%|████▌ | 68/150 [00:09<00:14, 5.79it/s]\u001b[A\n",
"Evaluating workflow: 47%|████▋ | 71/150 [00:10<00:09, 8.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 49%|████▊ | 73/150 [00:10<00:08, 9.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████▏ | 77/150 [00:10<00:08, 9.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:10<00:08, 8.87it/s]\u001b[A\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:11<00:05, 11.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 57%|█████▋ | 85/150 [00:11<00:07, 9.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 87/150 [00:11<00:06, 9.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:11<00:06, 8.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 61%|██████ | 91/150 [00:12<00:08, 6.92it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 93/150 [00:12<00:07, 8.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 95/150 [00:12<00:07, 6.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▍ | 97/150 [00:13<00:07, 7.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 99/150 [00:13<00:05, 8.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 67%|██████▋ | 101/150 [00:13<00:05, 8.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:13<00:05, 8.05it/s]\u001b[A\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:14<00:05, 8.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 108/150 [00:14<00:04, 9.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:14<00:04, 8.07it/s]\u001b[A\n",
"Evaluating workflow: 74%|███████▍ | 111/150 [00:14<00:05, 7.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:14<00:04, 7.66it/s]\u001b[A\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:15<00:04, 8.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:15<00:04, 7.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:16<00:06, 5.29it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:16<00:05, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:16<00:05, 5.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:16<00:06, 4.59it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:16<00:03, 7.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:17<00:03, 7.21it/s]\u001b[A\n",
"Evaluating workflow: 85%|████████▍ | 127/150 [00:17<00:02, 8.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 129/150 [00:17<00:02, 9.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 132/150 [00:17<00:02, 8.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▉ | 134/150 [00:18<00:01, 8.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:18<00:01, 9.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:18<00:01, 7.43it/s]\u001b[A\n",
"Evaluating workflow: 93%|█████████▎| 140/150 [00:18<00:01, 6.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:19<00:01, 7.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:19<00:01, 4.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 145/150 [00:20<00:01, 4.45it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:20<00:00, 5.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:20<00:00, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:21<00:00, 7.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-13 09:29:33.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 19 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.5266666666666666}\u001b[0m\n",
"\u001b[32m2026-01-13 09:29:33.792\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.58}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 95%|█████████▌| 19/20 [54:36<02:48, 168.43s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:29:33.795\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-13 09:29:50.428\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-13 09:31:40.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-13 09:31:51.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-13 09:31:51.839\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 20 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/150 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 1%| | 1/150 [00:01<04:07, 1.66s/it]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 3/150 [00:01<01:11, 2.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 3%|▎ | 5/150 [00:02<01:10, 2.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 5%|▍ | 7/150 [00:03<00:48, 2.93it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 9/150 [00:03<00:32, 4.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 7%|▋ | 11/150 [00:03<00:25, 5.52it/s]\u001b[A\n",
"Evaluating workflow: 9%|▊ | 13/150 [00:03<00:18, 7.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 15/150 [00:03<00:16, 7.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 18/150 [00:03<00:14, 8.89it/s]\u001b[A\n",
"Evaluating workflow: 13%|█▎ | 20/150 [00:04<00:13, 9.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 15%|█▌ | 23/150 [00:04<00:11, 11.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 17%|█▋ | 25/150 [00:04<00:17, 7.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 27/150 [00:05<00:16, 7.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 19%|█▊ | 28/150 [00:05<00:19, 6.41it/s]\u001b[A\n",
"Evaluating workflow: 19%|█▉ | 29/150 [00:05<00:18, 6.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 30/150 [00:05<00:25, 4.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 33/150 [00:06<00:20, 5.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 23%|██▎ | 35/150 [00:06<00:20, 5.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 36/150 [00:06<00:21, 5.26it/s]\u001b[A\n",
"Evaluating workflow: 25%|██▍ | 37/150 [00:07<00:20, 5.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 27%|██▋ | 40/150 [00:07<00:12, 8.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 42/150 [00:07<00:11, 9.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 29%|██▉ | 44/150 [00:07<00:14, 7.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 45/150 [00:07<00:15, 6.78it/s]\u001b[A\n",
"Evaluating workflow: 31%|███ | 46/150 [00:08<00:16, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 48/150 [00:08<00:14, 7.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 33%|███▎ | 49/150 [00:08<00:14, 6.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 51/150 [00:08<00:14, 6.73it/s]\u001b[A\n",
"Evaluating workflow: 36%|███▌ | 54/150 [00:09<00:10, 8.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 37%|███▋ | 55/150 [00:09<00:16, 5.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 57/150 [00:09<00:14, 6.57it/s]\u001b[A\n",
"Evaluating workflow: 39%|███▊ | 58/150 [00:09<00:14, 6.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 60/150 [00:10<00:13, 6.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████ | 61/150 [00:10<00:13, 6.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 41%|████▏ | 62/150 [00:10<00:15, 5.71it/s]\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 64/150 [00:10<00:12, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[A\n",
"Evaluating workflow: 43%|████▎ | 65/150 [00:11<00:14, 6.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 66/150 [00:11<00:16, 5.18it/s]\u001b[A\n",
"Evaluating workflow: 45%|████▍ | 67/150 [00:11<00:15, 5.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 47%|████▋ | 70/150 [00:11<00:10, 7.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 72/150 [00:12<00:11, 6.89it/s]\u001b[A\n",
"Evaluating workflow: 50%|█████ | 75/150 [00:12<00:08, 9.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 51%|█████ | 76/150 [00:12<00:08, 8.29it/s]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 78/150 [00:12<00:08, 8.77it/s]\u001b[A\n",
"Evaluating workflow: 53%|█████▎ | 79/150 [00:12<00:10, 6.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 53%|█████▎ | 80/150 [00:13<00:12, 5.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▍ | 82/150 [00:13<00:11, 6.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 55%|█████▌ | 83/150 [00:13<00:12, 5.55it/s]\u001b[A\n",
"Evaluating workflow: 57%|█████▋ | 86/150 [00:13<00:07, 8.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▊ | 88/150 [00:14<00:13, 4.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 59%|█████▉ | 89/150 [00:15<00:13, 4.60it/s]\u001b[A\n",
"Evaluating workflow: 61%|██████▏ | 92/150 [00:15<00:08, 7.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 63%|██████▎ | 94/150 [00:15<00:07, 7.55it/s]\u001b[A\n",
"Evaluating workflow: 64%|██████▍ | 96/150 [00:15<00:06, 8.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 65%|██████▌ | 98/150 [00:16<00:11, 4.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 69%|██████▊ | 103/150 [00:16<00:06, 7.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 105/150 [00:17<00:06, 7.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 71%|███████▏ | 107/150 [00:17<00:05, 7.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 73%|███████▎ | 109/150 [00:17<00:06, 6.43it/s]\u001b[A\n",
"Evaluating workflow: 73%|███████▎ | 110/150 [00:17<00:05, 6.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 75%|███████▍ | 112/150 [00:17<00:04, 8.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 114/150 [00:18<00:06, 5.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 77%|███████▋ | 115/150 [00:18<00:07, 4.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 117/150 [00:19<00:06, 5.47it/s]\u001b[A\n",
"Evaluating workflow: 79%|███████▊ | 118/150 [00:19<00:05, 5.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 79%|███████▉ | 119/150 [00:19<00:05, 6.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 120/150 [00:19<00:05, 5.50it/s]\u001b[A\n",
"Evaluating workflow: 81%|████████ | 121/150 [00:19<00:05, 5.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 123/150 [00:19<00:03, 7.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 83%|████████▎ | 124/150 [00:20<00:04, 6.18it/s]\u001b[A\n",
"Evaluating workflow: 83%|████████▎ | 125/150 [00:20<00:03, 6.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 126/150 [00:20<00:06, 3.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 85%|████████▌ | 128/150 [00:21<00:04, 5.23it/s]\u001b[A\n",
"Evaluating workflow: 87%|████████▋ | 131/150 [00:21<00:02, 8.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 89%|████████▊ | 133/150 [00:21<00:01, 9.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 135/150 [00:21<00:02, 7.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 91%|█████████▏| 137/150 [00:22<00:02, 6.17it/s]\u001b[A\n",
"Evaluating workflow: 93%|█████████▎| 139/150 [00:22<00:01, 7.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 141/150 [00:22<00:01, 6.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 95%|█████████▍| 142/150 [00:22<00:01, 6.21it/s]\u001b[A\n",
"Evaluating workflow: 96%|█████████▌| 144/150 [00:23<00:00, 7.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 97%|█████████▋| 146/150 [00:23<00:00, 6.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 147/150 [00:24<00:01, 2.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▊| 148/150 [00:25<00:00, 2.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 99%|█████████▉| 149/150 [00:25<00:00, 2.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 150/150 [00:26<00:00, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:32:17.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 20 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.47333333333333333}\u001b[0m\n",
"\u001b[32m2026-01-13 09:32:17.994\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.0, 'em': 0.0, 'acc': 0.58}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 20/20 [57:20<00:00, 172.03s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:32:17.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mReached the maximum number of steps 20. Optimization has finished.\u001b[0m\n",
"\u001b[32m2026-01-13 09:32:17.998\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36msave_module\u001b[0m:\u001b[36m1204\u001b[0m - \u001b[1mSaving SequentialWorkFlowGraph to ./PertQA_textgrad_final.json\u001b[0m\n",
"\u001b[32m2026-01-13 09:32:18.109\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36msave_module\u001b[0m:\u001b[36m1204\u001b[0m - \u001b[1mSaving SequentialWorkFlowGraph to ./PertQA_textgrad_best.json\u001b[0m\n",
"\u001b[32m2026-01-13 09:32:18.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m448\u001b[0m - \u001b[1mRestored the best graph from snapshot with metrics {'f1': 0.0, 'em': 0.0, 'acc': 0.58}\u001b[0m\n",
"\u001b[32m2026-01-13 09:32:18.110\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 1/3000 [00:00<46:27, 1.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 6/3000 [00:01<09:47, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 8/3000 [00:01<08:21, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 10/3000 [00:02<07:08, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 13/3000 [00:03<13:22, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 18/3000 [00:03<06:42, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 24/3000 [00:04<04:54, 10.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 29/3000 [00:04<04:20, 11.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 31/3000 [00:04<05:22, 9.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 33/3000 [00:05<05:28, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 36/3000 [00:05<06:15, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 39/3000 [00:05<05:23, 9.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 41/3000 [00:06<06:02, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 42/3000 [00:06<10:00, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 45/3000 [00:06<07:48, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 49/3000 [00:07<07:03, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 56/3000 [00:07<03:28, 14.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 59/3000 [00:07<03:02, 16.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 62/3000 [00:08<03:29, 14.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 64/3000 [00:08<05:14, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 68/3000 [00:08<04:20, 11.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 70/3000 [00:09<07:09, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▎ | 75/3000 [00:10<07:17, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 77/3000 [00:10<07:29, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 80/3000 [00:11<05:30, 8.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 84/3000 [00:11<05:35, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 86/3000 [00:11<06:03, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 91/3000 [00:12<04:04, 11.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 93/3000 [00:12<07:48, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 96/3000 [00:13<06:48, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 100/3000 [00:13<06:10, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 104/3000 [00:14<05:38, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 106/3000 [00:14<07:58, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:14<08:58, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 112/3000 [00:15<05:38, 8.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 114/3000 [00:15<04:43, 10.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 117/3000 [00:15<04:15, 11.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 119/3000 [00:15<05:31, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 121/3000 [00:16<05:58, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 124/3000 [00:16<05:08, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 126/3000 [00:17<08:14, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 130/3000 [00:17<05:51, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 132/3000 [00:17<05:47, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 134/3000 [00:17<06:13, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 135/3000 [00:18<06:32, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 138/3000 [00:18<06:24, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 144/3000 [00:19<04:35, 10.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 146/3000 [00:19<04:40, 10.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 152/3000 [00:19<03:43, 12.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 154/3000 [00:20<04:50, 9.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 158/3000 [00:20<06:35, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 164/3000 [00:21<04:15, 11.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 166/3000 [00:21<04:23, 10.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 168/3000 [00:21<05:15, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 171/3000 [00:22<06:06, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 174/3000 [00:22<05:12, 9.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 177/3000 [00:22<05:10, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 180/3000 [00:23<04:13, 11.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 182/3000 [00:23<05:30, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 184/3000 [00:23<06:34, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 186/3000 [00:23<05:27, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 188/3000 [00:24<06:06, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 191/3000 [00:24<06:15, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 195/3000 [00:24<04:22, 10.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 197/3000 [00:25<06:49, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 200/3000 [00:25<06:16, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 205/3000 [00:26<04:00, 11.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 208/3000 [00:26<04:38, 10.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 212/3000 [00:27<05:08, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 214/3000 [00:27<06:06, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 216/3000 [00:27<06:52, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 219/3000 [00:28<05:19, 8.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 221/3000 [00:28<05:22, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 224/3000 [00:28<05:58, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 226/3000 [00:29<07:42, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 229/3000 [00:29<06:18, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 231/3000 [00:29<05:18, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 234/3000 [00:29<04:17, 10.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 236/3000 [00:30<07:09, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 241/3000 [00:31<05:55, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 245/3000 [00:31<04:27, 10.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 247/3000 [00:31<04:11, 10.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 249/3000 [00:31<04:18, 10.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 252/3000 [00:32<06:10, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 256/3000 [00:32<05:15, 8.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 258/3000 [00:32<05:44, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 261/3000 [00:33<05:04, 9.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 263/3000 [00:33<06:23, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 267/3000 [00:34<05:50, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 269/3000 [00:34<05:13, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 271/3000 [00:34<04:54, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 278/3000 [00:34<03:11, 14.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 280/3000 [00:35<04:48, 9.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 284/3000 [00:35<05:12, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 286/3000 [00:36<05:31, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 288/3000 [00:36<05:05, 8.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 292/3000 [00:37<06:22, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 294/3000 [00:37<05:20, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 296/3000 [00:37<06:15, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 299/3000 [00:37<05:00, 8.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 301/3000 [00:38<05:08, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 304/3000 [00:38<05:53, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 307/3000 [00:38<05:14, 8.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 309/3000 [00:38<04:20, 10.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 311/3000 [00:39<05:02, 8.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 315/3000 [00:39<04:21, 10.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 317/3000 [00:39<04:49, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 319/3000 [00:40<06:45, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 322/3000 [00:40<06:16, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 325/3000 [00:41<06:39, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 328/3000 [00:41<05:02, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 330/3000 [00:41<04:52, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 333/3000 [00:42<07:31, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 338/3000 [00:42<05:08, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 342/3000 [00:43<05:47, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 347/3000 [00:44<04:45, 9.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 350/3000 [00:44<04:59, 8.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 353/3000 [00:44<05:16, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 355/3000 [00:45<06:07, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 356/3000 [00:45<07:35, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 358/3000 [00:45<07:20, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 360/3000 [00:46<07:50, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 362/3000 [00:46<06:39, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 368/3000 [00:46<03:53, 11.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 370/3000 [00:46<04:30, 9.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 372/3000 [00:47<04:13, 10.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 374/3000 [00:47<05:52, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 377/3000 [00:48<06:44, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 382/3000 [00:48<04:55, 8.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 383/3000 [00:48<04:49, 9.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 386/3000 [00:48<04:42, 9.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 390/3000 [00:49<06:41, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 391/3000 [00:50<07:26, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 396/3000 [00:50<05:03, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 397/3000 [00:50<05:04, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 400/3000 [00:50<04:50, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 402/3000 [00:51<05:11, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 405/3000 [00:51<06:01, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [00:52<04:41, 9.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 412/3000 [00:52<03:21, 12.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 416/3000 [00:52<03:34, 12.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 418/3000 [00:53<07:14, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 422/3000 [00:53<05:55, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 424/3000 [00:53<05:44, 7.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 425/3000 [00:54<06:53, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 428/3000 [00:54<05:55, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 431/3000 [00:55<07:02, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 437/3000 [00:55<04:41, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 440/3000 [00:55<03:42, 11.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 444/3000 [00:56<04:16, 9.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 446/3000 [00:56<04:12, 10.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 448/3000 [00:57<07:14, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 450/3000 [00:57<06:49, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 452/3000 [00:57<06:29, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 453/3000 [00:57<06:38, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 457/3000 [00:58<04:02, 10.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 459/3000 [00:58<04:51, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 461/3000 [00:58<04:32, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 462/3000 [00:58<06:04, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 463/3000 [00:59<06:53, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 466/3000 [00:59<06:44, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 471/3000 [01:00<04:15, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 473/3000 [01:00<03:50, 10.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 475/3000 [01:00<04:55, 8.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 477/3000 [01:00<05:13, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 480/3000 [01:01<04:53, 8.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 482/3000 [01:01<06:19, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 484/3000 [01:01<05:11, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 487/3000 [01:02<05:13, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 489/3000 [01:03<10:03, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 493/3000 [01:03<06:27, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 498/3000 [01:03<04:15, 9.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 500/3000 [01:03<04:24, 9.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 503/3000 [01:04<04:26, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 506/3000 [01:04<06:13, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 509/3000 [01:05<04:47, 8.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 512/3000 [01:05<04:51, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 514/3000 [01:05<04:11, 9.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 516/3000 [01:06<05:33, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 521/3000 [01:06<05:04, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 526/3000 [01:07<03:59, 10.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 531/3000 [01:07<04:20, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 534/3000 [01:08<04:15, 9.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 536/3000 [01:08<04:25, 9.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 538/3000 [01:08<05:14, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 541/3000 [01:09<05:03, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 545/3000 [01:09<05:17, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 547/3000 [01:09<04:21, 9.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 549/3000 [01:10<04:34, 8.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 551/3000 [01:10<05:04, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 553/3000 [01:10<05:41, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 554/3000 [01:10<06:10, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 555/3000 [01:11<06:54, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 562/3000 [01:11<03:33, 11.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 564/3000 [01:11<03:42, 10.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 568/3000 [01:12<04:31, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 571/3000 [01:12<04:14, 9.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 574/3000 [01:13<05:51, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 577/3000 [01:13<04:42, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 579/3000 [01:13<04:34, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 580/3000 [01:13<05:14, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 584/3000 [01:14<04:08, 9.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 586/3000 [01:14<05:52, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 588/3000 [01:14<04:57, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 593/3000 [01:15<03:49, 10.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 595/3000 [01:15<03:56, 10.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 597/3000 [01:15<04:09, 9.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 600/3000 [01:16<05:06, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 601/3000 [01:16<06:06, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 607/3000 [01:16<03:45, 10.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 609/3000 [01:17<05:00, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 613/3000 [01:17<04:42, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 615/3000 [01:18<06:03, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 617/3000 [01:18<07:10, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 623/3000 [01:19<03:38, 10.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 625/3000 [01:19<04:13, 9.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 627/3000 [01:19<03:49, 10.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 629/3000 [01:19<05:36, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 631/3000 [01:20<04:45, 8.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 635/3000 [01:20<04:03, 9.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 642/3000 [01:20<02:43, 14.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 645/3000 [01:21<05:26, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 647/3000 [01:22<05:38, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 649/3000 [01:22<05:21, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 651/3000 [01:22<04:53, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 655/3000 [01:22<04:10, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 659/3000 [01:23<04:21, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 664/3000 [01:24<04:22, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 666/3000 [01:24<04:19, 8.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 672/3000 [01:24<02:54, 13.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 674/3000 [01:24<03:34, 10.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 680/3000 [01:26<05:39, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 684/3000 [01:26<04:25, 8.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 687/3000 [01:26<03:21, 11.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 689/3000 [01:27<08:00, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 691/3000 [01:28<06:56, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 697/3000 [01:28<04:16, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 700/3000 [01:28<03:43, 10.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 702/3000 [01:28<04:22, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 704/3000 [01:29<04:17, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 708/3000 [01:29<05:28, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 711/3000 [01:30<04:43, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 713/3000 [01:30<04:04, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 716/3000 [01:30<05:13, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 717/3000 [01:31<05:48, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 719/3000 [01:31<06:08, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 724/3000 [01:31<04:06, 9.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 726/3000 [01:32<04:14, 8.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 727/3000 [01:32<04:17, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 729/3000 [01:32<04:47, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 738/3000 [01:33<03:42, 10.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 740/3000 [01:33<03:49, 9.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 745/3000 [01:34<04:13, 8.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 747/3000 [01:34<04:46, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 751/3000 [01:35<04:35, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 754/3000 [01:35<03:29, 10.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 756/3000 [01:36<05:42, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 760/3000 [01:36<04:26, 8.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 762/3000 [01:36<03:42, 10.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 767/3000 [01:36<03:17, 11.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 769/3000 [01:37<03:32, 10.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 771/3000 [01:37<04:17, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 773/3000 [01:37<04:11, 8.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 775/3000 [01:37<04:08, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 778/3000 [01:38<04:42, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 780/3000 [01:38<03:50, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 782/3000 [01:38<05:28, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 783/3000 [01:39<05:10, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 788/3000 [01:39<04:03, 9.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 790/3000 [01:39<03:25, 10.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 792/3000 [01:40<04:29, 8.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 794/3000 [01:40<05:20, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 799/3000 [01:40<04:41, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 803/3000 [01:41<03:25, 10.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 807/3000 [01:41<04:43, 7.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 809/3000 [01:42<04:40, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 812/3000 [01:42<03:45, 9.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 817/3000 [01:42<03:53, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 819/3000 [01:43<04:02, 8.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 821/3000 [01:43<05:15, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 823/3000 [01:43<05:05, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 829/3000 [01:44<03:05, 11.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 833/3000 [01:44<02:41, 13.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 835/3000 [01:44<02:33, 14.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 837/3000 [01:45<03:58, 9.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 841/3000 [01:46<06:01, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 842/3000 [01:46<06:08, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 844/3000 [01:46<06:11, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 847/3000 [01:46<05:07, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 850/3000 [01:47<04:45, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 851/3000 [01:47<04:39, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 852/3000 [01:47<05:23, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 854/3000 [01:47<04:49, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 857/3000 [01:48<04:14, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 858/3000 [01:48<04:46, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 859/3000 [01:48<05:37, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 863/3000 [01:49<04:19, 8.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 864/3000 [01:49<04:51, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 869/3000 [01:49<03:26, 10.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 874/3000 [01:50<03:43, 9.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 878/3000 [01:50<03:41, 9.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 880/3000 [01:50<03:38, 9.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 882/3000 [01:51<04:42, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 884/3000 [01:51<06:22, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 886/3000 [01:52<04:52, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 889/3000 [01:52<04:09, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [01:52<05:08, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 895/3000 [01:53<04:23, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 897/3000 [01:53<03:52, 9.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 899/3000 [01:53<03:43, 9.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 901/3000 [01:54<04:38, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 906/3000 [01:54<03:22, 10.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 910/3000 [01:54<03:38, 9.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 912/3000 [01:55<03:21, 10.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 916/3000 [01:55<03:02, 11.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 918/3000 [01:55<05:05, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 921/3000 [01:56<05:28, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 925/3000 [01:56<04:06, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 927/3000 [01:57<04:10, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 931/3000 [01:57<03:42, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 933/3000 [01:57<04:22, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 936/3000 [01:58<04:17, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 939/3000 [01:58<04:20, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 941/3000 [01:58<04:24, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 942/3000 [01:59<05:02, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 944/3000 [01:59<04:36, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 948/3000 [01:59<03:45, 9.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 950/3000 [02:00<04:12, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 952/3000 [02:00<05:12, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 953/3000 [02:00<05:23, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 954/3000 [02:00<06:19, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 959/3000 [02:01<04:04, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 963/3000 [02:01<03:07, 10.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 965/3000 [02:01<03:01, 11.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 967/3000 [02:02<04:20, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 971/3000 [02:03<05:22, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 973/3000 [02:03<04:57, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 978/3000 [02:03<03:09, 10.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 980/3000 [02:03<03:46, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 982/3000 [02:04<05:14, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 986/3000 [02:05<04:50, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 990/3000 [02:05<03:35, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 992/3000 [02:05<03:07, 10.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 994/3000 [02:06<05:21, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 998/3000 [02:06<04:07, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1000/3000 [02:06<03:31, 9.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1002/3000 [02:06<03:38, 9.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1007/3000 [02:07<03:06, 10.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1009/3000 [02:07<05:03, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1011/3000 [02:08<04:50, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1017/3000 [02:08<03:23, 9.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1019/3000 [02:08<03:18, 9.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1022/3000 [02:09<03:19, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1024/3000 [02:09<04:06, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1026/3000 [02:09<05:13, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1030/3000 [02:10<04:32, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1035/3000 [02:10<03:20, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1037/3000 [02:11<03:39, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1039/3000 [02:11<03:59, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1042/3000 [02:11<03:44, 8.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1044/3000 [02:12<03:54, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1046/3000 [02:12<04:58, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1048/3000 [02:12<03:59, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1050/3000 [02:12<03:55, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1052/3000 [02:13<03:44, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1054/3000 [02:13<04:20, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1056/3000 [02:13<04:51, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1059/3000 [02:13<03:12, 10.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1061/3000 [02:14<04:09, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1066/3000 [02:14<03:01, 10.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1068/3000 [02:15<03:49, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1070/3000 [02:15<05:01, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1074/3000 [02:15<04:01, 7.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1076/3000 [02:16<03:38, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1080/3000 [02:16<02:42, 11.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1082/3000 [02:16<02:56, 10.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1084/3000 [02:17<04:13, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1085/3000 [02:17<04:39, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1086/3000 [02:17<05:10, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1094/3000 [02:18<03:21, 9.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1098/3000 [02:18<03:10, 9.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1100/3000 [02:18<03:32, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1103/3000 [02:19<04:30, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1107/3000 [02:19<03:10, 9.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1111/3000 [02:20<03:07, 10.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1113/3000 [02:20<02:40, 11.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1116/3000 [02:21<04:45, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1121/3000 [02:21<03:25, 9.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1123/3000 [02:21<03:20, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1127/3000 [02:22<03:47, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1129/3000 [02:22<05:27, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1133/3000 [02:23<04:13, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1136/3000 [02:23<03:32, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1141/3000 [02:23<03:23, 9.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1143/3000 [02:24<04:01, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1148/3000 [02:24<03:17, 9.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1150/3000 [02:25<03:25, 9.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1154/3000 [02:25<03:07, 9.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1158/3000 [02:26<04:19, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1161/3000 [02:26<04:02, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1164/3000 [02:27<03:48, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1168/3000 [02:27<03:14, 9.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1170/3000 [02:27<03:16, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1172/3000 [02:27<03:30, 8.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1175/3000 [02:28<03:02, 10.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1178/3000 [02:28<03:32, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1180/3000 [02:29<05:13, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1181/3000 [02:29<06:08, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1186/3000 [02:29<03:49, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1188/3000 [02:30<04:10, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1190/3000 [02:30<04:12, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1194/3000 [02:30<02:40, 11.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1198/3000 [02:31<03:24, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1200/3000 [02:31<03:36, 8.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1203/3000 [02:32<03:56, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1206/3000 [02:32<03:52, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1210/3000 [02:33<03:55, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1213/3000 [02:33<03:30, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1214/3000 [02:33<04:00, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1216/3000 [02:33<03:43, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1221/3000 [02:34<03:23, 8.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1223/3000 [02:34<03:27, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1225/3000 [02:34<03:49, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1227/3000 [02:35<03:34, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1230/3000 [02:35<03:41, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1232/3000 [02:35<03:07, 9.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1235/3000 [02:36<03:35, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1237/3000 [02:36<04:35, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1240/3000 [02:36<03:06, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1242/3000 [02:36<03:26, 8.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1244/3000 [02:37<03:17, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1246/3000 [02:37<02:52, 10.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1249/3000 [02:37<03:41, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1251/3000 [02:38<03:42, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1256/3000 [02:38<03:28, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1260/3000 [02:38<02:38, 10.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1264/3000 [02:39<03:10, 9.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1266/3000 [02:39<03:07, 9.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1268/3000 [02:39<03:03, 9.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1270/3000 [02:40<03:44, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1274/3000 [02:40<02:49, 10.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1277/3000 [02:40<02:59, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1279/3000 [02:41<03:12, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1281/3000 [02:41<04:31, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1284/3000 [02:41<03:43, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1287/3000 [02:42<03:57, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1290/3000 [02:42<03:10, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1292/3000 [02:43<03:43, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1294/3000 [02:43<04:34, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1299/3000 [02:43<02:32, 11.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1301/3000 [02:44<03:22, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1303/3000 [02:44<02:57, 9.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1308/3000 [02:45<03:36, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1310/3000 [02:45<04:02, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1312/3000 [02:45<03:40, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1314/3000 [02:45<03:23, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1319/3000 [02:46<02:10, 12.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1321/3000 [02:47<05:06, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1325/3000 [02:47<03:57, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1329/3000 [02:47<03:05, 9.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1334/3000 [02:48<02:34, 10.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1338/3000 [02:48<03:43, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1340/3000 [02:49<03:31, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1342/3000 [02:49<03:15, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1346/3000 [02:50<03:56, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1348/3000 [02:50<04:33, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1349/3000 [02:51<06:27, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1353/3000 [02:51<04:14, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1357/3000 [02:51<02:54, 9.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1359/3000 [02:51<02:55, 9.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1361/3000 [02:52<03:30, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1363/3000 [02:52<03:45, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1365/3000 [02:52<03:40, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1368/3000 [02:53<03:27, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1372/3000 [02:53<02:41, 10.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1374/3000 [02:54<03:43, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1378/3000 [02:54<03:11, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1379/3000 [02:54<03:23, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1384/3000 [02:55<02:50, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1386/3000 [02:55<02:46, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1388/3000 [02:55<02:36, 10.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1394/3000 [02:56<03:06, 8.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1396/3000 [02:56<02:51, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1398/3000 [02:56<03:08, 8.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1401/3000 [02:57<02:52, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1403/3000 [02:57<04:07, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1405/3000 [02:57<04:03, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1406/3000 [02:58<04:27, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1409/3000 [02:58<03:53, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1413/3000 [02:58<02:50, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1416/3000 [02:59<02:55, 9.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1420/3000 [02:59<02:59, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1422/3000 [02:59<02:32, 10.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1424/3000 [03:00<02:43, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1426/3000 [03:00<03:14, 8.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1428/3000 [03:00<03:25, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1430/3000 [03:01<03:34, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1432/3000 [03:01<03:28, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1434/3000 [03:01<03:15, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1436/3000 [03:01<04:15, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1438/3000 [03:02<04:27, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1443/3000 [03:02<02:10, 11.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1447/3000 [03:02<01:48, 14.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1451/3000 [03:03<01:52, 13.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1453/3000 [03:03<04:20, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1455/3000 [03:04<04:26, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [03:04<03:38, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1459/3000 [03:04<03:20, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1463/3000 [03:05<03:38, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1465/3000 [03:05<03:31, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1466/3000 [03:05<03:45, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1468/3000 [03:06<04:09, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1473/3000 [03:06<02:36, 9.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1475/3000 [03:06<02:50, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1476/3000 [03:07<03:39, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1478/3000 [03:07<03:38, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1482/3000 [03:07<03:21, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1483/3000 [03:08<03:20, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1484/3000 [03:08<04:42, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1487/3000 [03:08<03:32, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1489/3000 [03:08<03:25, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1491/3000 [03:09<03:24, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1497/3000 [03:09<02:03, 12.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1499/3000 [03:10<03:00, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1502/3000 [03:10<04:03, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1506/3000 [03:11<03:32, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1510/3000 [03:11<02:25, 10.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1512/3000 [03:11<02:32, 9.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1514/3000 [03:11<02:22, 10.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1516/3000 [03:12<05:07, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1520/3000 [03:13<03:46, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1524/3000 [03:13<02:30, 9.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1528/3000 [03:13<02:12, 11.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1530/3000 [03:14<02:32, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1532/3000 [03:14<03:28, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1534/3000 [03:14<03:34, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1539/3000 [03:15<02:24, 10.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1542/3000 [03:15<02:38, 9.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1544/3000 [03:15<02:59, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1545/3000 [03:16<03:46, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1550/3000 [03:16<03:24, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1552/3000 [03:17<02:55, 8.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1556/3000 [03:17<02:37, 9.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1560/3000 [03:17<01:57, 12.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1564/3000 [03:18<02:05, 11.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1566/3000 [03:18<03:04, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1571/3000 [03:19<02:49, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1574/3000 [03:19<02:45, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1576/3000 [03:19<03:02, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1578/3000 [03:20<03:18, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1579/3000 [03:20<04:19, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1581/3000 [03:20<04:31, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1587/3000 [03:21<02:12, 10.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1591/3000 [03:21<01:43, 13.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1593/3000 [03:21<02:14, 10.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1595/3000 [03:22<02:23, 9.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1597/3000 [03:22<02:33, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1599/3000 [03:22<03:04, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1601/3000 [03:22<02:49, 8.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1603/3000 [03:23<03:06, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1606/3000 [03:23<03:44, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1608/3000 [03:24<03:32, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1609/3000 [03:24<03:35, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1611/3000 [03:24<03:13, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1612/3000 [03:24<03:52, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1615/3000 [03:25<02:52, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1619/3000 [03:25<02:33, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1621/3000 [03:25<02:49, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1623/3000 [03:26<03:48, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1625/3000 [03:26<03:53, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1629/3000 [03:26<02:23, 9.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1631/3000 [03:27<02:45, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1634/3000 [03:27<02:48, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1637/3000 [03:28<03:59, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1639/3000 [03:28<04:05, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1641/3000 [03:29<03:49, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1646/3000 [03:29<02:31, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1648/3000 [03:29<02:18, 9.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1651/3000 [03:30<02:45, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1653/3000 [03:30<02:50, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1655/3000 [03:31<04:20, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1658/3000 [03:31<02:43, 8.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1662/3000 [03:31<02:09, 10.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1665/3000 [03:32<04:04, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1668/3000 [03:32<03:09, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1671/3000 [03:33<02:32, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1673/3000 [03:33<02:37, 8.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1677/3000 [03:34<02:53, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [03:34<02:29, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1682/3000 [03:34<01:57, 11.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1684/3000 [03:34<02:15, 9.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1688/3000 [03:35<02:15, 9.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1690/3000 [03:35<02:05, 10.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1694/3000 [03:36<03:07, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1696/3000 [03:36<03:09, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1701/3000 [03:36<02:30, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1703/3000 [03:37<03:18, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [03:37<03:40, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1708/3000 [03:37<02:40, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1710/3000 [03:38<02:12, 9.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1714/3000 [03:38<02:10, 9.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1716/3000 [03:38<02:31, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1717/3000 [03:39<03:04, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1720/3000 [03:39<02:55, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1724/3000 [03:39<01:53, 11.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1726/3000 [03:40<03:03, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1729/3000 [03:40<03:13, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1732/3000 [03:40<02:32, 8.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1737/3000 [03:41<02:24, 8.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1739/3000 [03:41<02:23, 8.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1741/3000 [03:42<02:37, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1743/3000 [03:42<03:00, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1745/3000 [03:42<03:10, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1749/3000 [03:43<02:46, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1752/3000 [03:43<02:45, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1753/3000 [03:43<02:38, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1754/3000 [03:44<03:42, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1756/3000 [03:44<03:25, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1758/3000 [03:44<03:17, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1763/3000 [03:45<02:25, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1765/3000 [03:45<03:12, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1766/3000 [03:45<03:16, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1769/3000 [03:46<02:34, 7.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1772/3000 [03:46<02:16, 9.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1777/3000 [03:46<02:05, 9.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1779/3000 [03:47<02:21, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1781/3000 [03:47<02:58, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1784/3000 [03:48<02:54, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1785/3000 [03:48<03:44, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1788/3000 [03:49<03:25, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1794/3000 [03:49<02:26, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1796/3000 [03:49<02:07, 9.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1799/3000 [03:50<03:16, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1801/3000 [03:50<02:40, 7.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1805/3000 [03:51<02:15, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1807/3000 [03:51<02:05, 9.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1811/3000 [03:51<01:49, 10.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1813/3000 [03:51<01:35, 12.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1815/3000 [03:52<02:39, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1821/3000 [03:52<02:12, 8.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1824/3000 [03:53<01:49, 10.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1826/3000 [03:53<03:00, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1828/3000 [03:54<02:49, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1830/3000 [03:54<03:08, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1831/3000 [03:54<03:04, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1837/3000 [03:54<01:55, 10.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1841/3000 [03:55<01:29, 12.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1843/3000 [03:55<01:54, 10.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1847/3000 [03:56<02:11, 8.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1850/3000 [03:56<02:51, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1852/3000 [03:57<03:00, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1854/3000 [03:57<02:51, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1857/3000 [03:57<02:43, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1863/3000 [03:58<01:40, 11.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1865/3000 [03:58<01:52, 10.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1867/3000 [03:58<01:51, 10.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1869/3000 [03:58<02:02, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1871/3000 [03:59<02:26, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1873/3000 [03:59<02:20, 8.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1876/3000 [03:59<02:35, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1877/3000 [04:00<02:52, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1881/3000 [04:00<02:05, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1883/3000 [04:00<02:42, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1886/3000 [04:01<02:31, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1888/3000 [04:01<02:34, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1891/3000 [04:01<02:00, 9.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1896/3000 [04:02<01:50, 10.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1900/3000 [04:02<01:40, 10.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1902/3000 [04:03<02:36, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1904/3000 [04:03<03:03, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1906/3000 [04:03<02:53, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1908/3000 [04:04<04:27, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1911/3000 [04:05<03:02, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1912/3000 [04:05<03:06, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1915/3000 [04:06<03:08, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1919/3000 [04:06<02:07, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1921/3000 [04:06<02:11, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1924/3000 [04:07<02:30, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1926/3000 [04:07<02:07, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1929/3000 [04:07<02:37, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1932/3000 [04:07<01:47, 9.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1935/3000 [04:08<02:06, 8.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1937/3000 [04:08<02:38, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1938/3000 [04:09<02:49, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1940/3000 [04:09<03:36, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1942/3000 [04:09<03:04, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1947/3000 [04:10<01:49, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1952/3000 [04:11<02:45, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1954/3000 [04:11<03:05, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1957/3000 [04:12<02:30, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1959/3000 [04:12<02:11, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1964/3000 [04:12<01:26, 11.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1966/3000 [04:13<02:46, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1968/3000 [04:13<03:25, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1970/3000 [04:14<03:12, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1971/3000 [04:14<02:54, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1976/3000 [04:14<02:01, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1979/3000 [04:14<01:26, 11.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1983/3000 [04:15<01:32, 11.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1985/3000 [04:15<02:38, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1987/3000 [04:16<03:23, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1989/3000 [04:16<02:53, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1994/3000 [04:17<02:02, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1996/3000 [04:17<02:04, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1999/3000 [04:18<02:33, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2001/3000 [04:18<02:04, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2003/3000 [04:18<02:03, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2005/3000 [04:18<02:24, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2010/3000 [04:19<01:45, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2012/3000 [04:19<01:30, 10.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2014/3000 [04:19<02:02, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2016/3000 [04:20<02:49, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2019/3000 [04:20<02:09, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2021/3000 [04:20<02:05, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2022/3000 [04:20<02:24, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2024/3000 [04:21<02:22, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2027/3000 [04:21<02:43, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2030/3000 [04:22<02:03, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2033/3000 [04:22<01:58, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2037/3000 [04:22<01:33, 10.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [04:23<01:59, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2041/3000 [04:23<01:52, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2043/3000 [04:23<02:00, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2047/3000 [04:24<02:14, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2051/3000 [04:24<01:35, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2053/3000 [04:25<02:18, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2055/3000 [04:25<02:10, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2059/3000 [04:25<02:04, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2061/3000 [04:26<01:49, 8.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2064/3000 [04:26<02:04, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2068/3000 [04:27<02:00, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2070/3000 [04:27<02:25, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2075/3000 [04:27<01:21, 11.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2077/3000 [04:28<01:46, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2079/3000 [04:28<01:33, 9.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2083/3000 [04:28<01:48, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2086/3000 [04:29<01:54, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2088/3000 [04:29<02:06, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2092/3000 [04:30<02:02, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2094/3000 [04:30<03:04, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2098/3000 [04:31<02:20, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2102/3000 [04:31<01:24, 10.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2104/3000 [04:32<02:11, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2109/3000 [04:32<01:27, 10.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2113/3000 [04:32<01:43, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2115/3000 [04:33<01:33, 9.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2118/3000 [04:33<01:15, 11.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2120/3000 [04:33<01:59, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2123/3000 [04:34<01:43, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2125/3000 [04:34<02:27, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2128/3000 [04:35<02:01, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2130/3000 [04:35<01:47, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2133/3000 [04:35<02:13, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2136/3000 [04:36<01:54, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2139/3000 [04:36<01:50, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2141/3000 [04:36<01:57, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2143/3000 [04:37<02:15, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2147/3000 [04:37<01:33, 9.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2150/3000 [04:37<01:25, 9.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2152/3000 [04:38<01:24, 10.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2154/3000 [04:38<02:01, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2158/3000 [04:38<01:34, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2161/3000 [04:39<01:53, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2163/3000 [04:39<01:36, 8.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2165/3000 [04:39<01:38, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2166/3000 [04:39<01:39, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2169/3000 [04:40<02:06, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▎ | 2175/3000 [04:40<01:11, 11.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2177/3000 [04:41<01:18, 10.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2179/3000 [04:41<01:24, 9.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2181/3000 [04:41<01:40, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2185/3000 [04:42<01:31, 8.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2186/3000 [04:42<01:38, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2190/3000 [04:42<01:48, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2192/3000 [04:43<01:45, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2195/3000 [04:43<02:05, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2197/3000 [04:44<02:07, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2201/3000 [04:44<01:35, 8.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2203/3000 [04:44<01:20, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2205/3000 [04:44<01:37, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2209/3000 [04:45<01:28, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2212/3000 [04:46<02:22, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2215/3000 [04:47<02:52, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2219/3000 [04:47<01:41, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2221/3000 [04:47<01:28, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2224/3000 [04:47<01:30, 8.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2226/3000 [04:48<02:01, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2230/3000 [04:48<01:25, 9.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2232/3000 [04:49<02:11, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2236/3000 [04:49<01:45, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2237/3000 [04:50<02:20, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2240/3000 [04:50<01:56, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2242/3000 [04:50<01:30, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2244/3000 [04:50<01:27, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2246/3000 [04:51<01:30, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2251/3000 [04:51<01:06, 11.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2253/3000 [04:51<01:14, 9.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2255/3000 [04:52<01:57, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2258/3000 [04:52<01:25, 8.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2262/3000 [04:52<01:10, 10.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2264/3000 [04:53<01:24, 8.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2266/3000 [04:53<01:20, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2268/3000 [04:53<01:48, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2271/3000 [04:54<01:50, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2273/3000 [04:54<01:38, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2275/3000 [04:54<01:49, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2278/3000 [04:55<01:22, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2282/3000 [04:55<01:08, 10.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2285/3000 [04:55<00:55, 12.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2287/3000 [04:56<01:35, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2290/3000 [04:56<01:24, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2294/3000 [04:56<01:14, 9.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2296/3000 [04:57<01:18, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2300/3000 [04:57<01:25, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2303/3000 [04:58<01:39, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2306/3000 [04:58<01:34, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2307/3000 [04:58<01:29, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2311/3000 [04:59<01:39, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2312/3000 [04:59<01:57, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2315/3000 [05:00<01:27, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2316/3000 [05:00<01:42, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2317/3000 [05:00<01:58, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2321/3000 [05:00<01:29, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2326/3000 [05:01<01:07, 10.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2329/3000 [05:01<00:56, 11.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2332/3000 [05:02<01:42, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2333/3000 [05:02<01:56, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2337/3000 [05:02<01:15, 8.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2339/3000 [05:03<01:45, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2340/3000 [05:03<01:52, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2342/3000 [05:03<01:48, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2347/3000 [05:04<01:19, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2350/3000 [05:04<01:00, 10.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2352/3000 [05:04<01:09, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2354/3000 [05:05<01:23, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2357/3000 [05:06<01:49, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2358/3000 [05:06<01:54, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2364/3000 [05:06<00:58, 10.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2366/3000 [05:07<01:24, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2368/3000 [05:07<01:21, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2370/3000 [05:07<01:32, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2373/3000 [05:08<01:17, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2376/3000 [05:08<00:56, 11.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2378/3000 [05:08<01:01, 10.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2383/3000 [05:08<00:52, 11.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2385/3000 [05:09<01:46, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2389/3000 [05:09<01:14, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2394/3000 [05:10<01:08, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2397/3000 [05:10<00:55, 10.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2401/3000 [05:11<01:24, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2403/3000 [05:12<01:37, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2405/3000 [05:12<01:34, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2408/3000 [05:12<01:14, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2410/3000 [05:12<01:16, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2413/3000 [05:12<01:03, 9.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2416/3000 [05:13<01:17, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2418/3000 [05:13<01:05, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2420/3000 [05:13<01:03, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2422/3000 [05:14<01:19, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2425/3000 [05:14<01:23, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2427/3000 [05:14<01:05, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2430/3000 [05:15<01:33, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2431/3000 [05:15<01:27, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2436/3000 [05:16<01:14, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2439/3000 [05:16<01:12, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2443/3000 [05:17<01:06, 8.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2444/3000 [05:17<01:12, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2445/3000 [05:17<01:20, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2448/3000 [05:17<01:13, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2450/3000 [05:17<00:56, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2454/3000 [05:18<00:54, 9.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2458/3000 [05:18<01:06, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2460/3000 [05:19<01:21, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2461/3000 [05:19<01:48, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2468/3000 [05:20<01:08, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2472/3000 [05:21<01:05, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▎ | 2475/3000 [05:21<01:17, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2480/3000 [05:22<01:05, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2484/3000 [05:22<01:00, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2486/3000 [05:22<00:53, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2490/3000 [05:23<00:58, 8.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2492/3000 [05:23<01:04, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2495/3000 [05:24<00:53, 9.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2497/3000 [05:24<01:07, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2501/3000 [05:24<01:01, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2503/3000 [05:25<00:58, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2505/3000 [05:25<01:05, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2508/3000 [05:25<00:50, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2510/3000 [05:25<00:56, 8.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2512/3000 [05:26<01:22, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2517/3000 [05:26<00:55, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2519/3000 [05:27<00:58, 8.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2523/3000 [05:27<00:39, 12.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2525/3000 [05:27<00:43, 10.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2527/3000 [05:27<00:45, 10.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2529/3000 [05:28<01:12, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2532/3000 [05:28<01:06, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2534/3000 [05:28<00:53, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2539/3000 [05:29<00:44, 10.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2543/3000 [05:30<01:08, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2549/3000 [05:30<00:38, 11.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2552/3000 [05:30<00:32, 13.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2554/3000 [05:30<00:36, 12.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2556/3000 [05:31<00:40, 11.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2558/3000 [05:31<00:53, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2560/3000 [05:31<00:56, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2562/3000 [05:32<00:52, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2567/3000 [05:32<00:43, 9.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2570/3000 [05:32<00:37, 11.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2572/3000 [05:32<00:44, 9.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2577/3000 [05:34<01:00, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2580/3000 [05:34<00:49, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2582/3000 [05:34<00:48, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2584/3000 [05:34<00:57, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2585/3000 [05:35<00:58, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2589/3000 [05:35<00:49, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2591/3000 [05:35<00:50, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2594/3000 [05:35<00:39, 10.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2596/3000 [05:36<00:53, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2598/3000 [05:36<00:46, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2601/3000 [05:36<00:50, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2603/3000 [05:37<00:44, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2607/3000 [05:37<00:46, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2609/3000 [05:37<00:41, 9.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2613/3000 [05:38<00:42, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2615/3000 [05:38<00:39, 9.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2617/3000 [05:38<00:43, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2620/3000 [05:39<00:51, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2624/3000 [05:39<00:43, 8.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2626/3000 [05:40<00:47, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2628/3000 [05:40<00:51, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2629/3000 [05:40<00:52, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2631/3000 [05:40<00:56, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2637/3000 [05:41<00:32, 11.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2639/3000 [05:41<00:48, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2642/3000 [05:42<00:42, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2644/3000 [05:42<00:44, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2645/3000 [05:42<00:52, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2648/3000 [05:43<00:48, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2650/3000 [05:43<00:51, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2651/3000 [05:43<01:02, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2656/3000 [05:44<00:37, 9.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [05:44<00:31, 10.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2662/3000 [05:44<00:28, 11.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2664/3000 [05:44<00:32, 10.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2666/3000 [05:45<00:46, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2668/3000 [05:45<00:56, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2671/3000 [05:45<00:36, 8.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2673/3000 [05:46<00:34, 9.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2675/3000 [05:46<00:37, 8.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2677/3000 [05:46<00:49, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2681/3000 [05:47<00:46, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2683/3000 [05:47<00:42, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2686/3000 [05:47<00:38, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2690/3000 [05:48<00:33, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2698/3000 [05:49<00:46, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2702/3000 [05:49<00:30, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2705/3000 [05:50<00:36, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2707/3000 [05:50<00:31, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2709/3000 [05:50<00:33, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2711/3000 [05:51<00:40, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2714/3000 [05:51<00:41, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2717/3000 [05:52<00:41, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2721/3000 [05:52<00:27, 10.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2726/3000 [05:53<00:39, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2728/3000 [05:53<00:47, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2730/3000 [05:54<00:39, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2733/3000 [05:54<00:29, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2736/3000 [05:54<00:31, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2739/3000 [05:55<00:41, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2740/3000 [05:55<00:40, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2742/3000 [05:55<00:35, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2749/3000 [05:56<00:23, 10.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2751/3000 [05:56<00:33, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2753/3000 [05:56<00:29, 8.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2755/3000 [05:57<00:40, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2757/3000 [05:57<00:36, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2760/3000 [05:58<00:31, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2763/3000 [05:58<00:31, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2768/3000 [05:58<00:22, 10.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2770/3000 [05:59<00:26, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2772/3000 [05:59<00:34, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2776/3000 [06:00<00:28, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2779/3000 [06:00<00:20, 10.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [06:00<00:23, 9.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2784/3000 [06:00<00:20, 10.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2786/3000 [06:01<00:31, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2788/3000 [06:01<00:33, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2790/3000 [06:01<00:30, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2794/3000 [06:02<00:26, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2796/3000 [06:02<00:22, 9.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2799/3000 [06:02<00:20, 9.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2801/3000 [06:03<00:22, 8.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2804/3000 [06:03<00:23, 8.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2805/3000 [06:03<00:26, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2810/3000 [06:04<00:21, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2812/3000 [06:04<00:22, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2815/3000 [06:04<00:17, 10.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2817/3000 [06:05<00:24, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2822/3000 [06:05<00:19, 9.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2826/3000 [06:06<00:23, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2828/3000 [06:06<00:22, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2830/3000 [06:06<00:18, 9.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2832/3000 [06:07<00:18, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2834/3000 [06:07<00:21, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2837/3000 [06:07<00:17, 9.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2841/3000 [06:07<00:15, 10.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2845/3000 [06:08<00:14, 10.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2847/3000 [06:09<00:26, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2851/3000 [06:09<00:20, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2855/3000 [06:10<00:17, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2861/3000 [06:10<00:13, 10.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2863/3000 [06:10<00:17, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2865/3000 [06:11<00:15, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2869/3000 [06:11<00:14, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2872/3000 [06:11<00:12, 10.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2874/3000 [06:12<00:15, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2875/3000 [06:12<00:18, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2876/3000 [06:12<00:19, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2880/3000 [06:13<00:18, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2881/3000 [06:13<00:17, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2885/3000 [06:13<00:13, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2889/3000 [06:14<00:10, 10.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2891/3000 [06:14<00:10, 9.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2896/3000 [06:14<00:10, 9.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2900/3000 [06:15<00:12, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2904/3000 [06:15<00:09, 10.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2906/3000 [06:16<00:14, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2911/3000 [06:17<00:12, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2915/3000 [06:17<00:09, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2918/3000 [06:17<00:07, 11.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2920/3000 [06:18<00:09, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2925/3000 [06:18<00:06, 11.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2927/3000 [06:18<00:08, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2929/3000 [06:19<00:10, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2931/3000 [06:19<00:08, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2933/3000 [06:19<00:10, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2934/3000 [06:20<00:12, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2938/3000 [06:20<00:08, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2940/3000 [06:20<00:07, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2944/3000 [06:21<00:05, 9.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2947/3000 [06:21<00:06, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2949/3000 [06:22<00:06, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2952/3000 [06:22<00:06, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2954/3000 [06:22<00:04, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2959/3000 [06:22<00:03, 11.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2961/3000 [06:23<00:03, 12.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2963/3000 [06:23<00:04, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2966/3000 [06:24<00:04, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2967/3000 [06:24<00:04, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2973/3000 [06:24<00:02, 10.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2977/3000 [06:25<00:03, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2980/3000 [06:25<00:02, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2984/3000 [06:26<00:01, 11.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2986/3000 [06:26<00:01, 10.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2990/3000 [06:27<00:01, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2992/3000 [06:27<00:01, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2993/3000 [06:27<00:01, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2995/3000 [06:27<00:00, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2997/3000 [06:28<00:00, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2998/3000 [06:28<00:00, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [06:29<00:00, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:38:47.314\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mEvaluation metrics (after optimization): {'f1': 0.0, 'em': 0.0, 'acc': 0.49833333333333335}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")\n",
"\n",
"logger.info(\"Optimizing workflow...\")\n",
"textgrad_optimizer.optimize(benchmark, seed=8)\n",
"textgrad_optimizer.restore_best_graph()\n",
"\n",
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4b6f274d",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"{'f1': 0.0, 'em': 0.0, 'acc': 0.49833333333333335}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "8ffcb068",
"metadata": {},
"outputs": [],
"source": [
"#4o generate workflow\n",
"qa_workflow = {\n",
" \"goal\": \"Provide a direct answer to the question based on the context, without including explanations or reasoning.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"answer_generate\",\n",
" \"description\": \"Generate a direct answer to the question based on the context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"problem\", \"type\": \"str\", \"required\": True, \"description\": \"The question to answer directly.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Provide a concise and direct answer to the question.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ] \n",
"}\n",
"def collate_func(example: dict) -> dict:\n",
" problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question\"])\n",
" return {\"problem\": problem}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "dfaf5515",
"metadata": {},
"outputs": [],
"source": [
"workflow_graph = SequentialWorkFlowGraph.from_dict(qa_workflow)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "2d25086a",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"3000"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._test_data)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "09118f35",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:49:33.853\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 3/3000 [00:00<12:28, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 9/3000 [00:01<04:00, 12.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 18/3000 [00:01<02:43, 18.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 23/3000 [00:01<03:19, 14.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 28/3000 [00:02<02:39, 18.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 31/3000 [00:02<02:49, 17.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 41/3000 [00:02<02:09, 22.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 44/3000 [00:02<02:54, 16.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 51/3000 [00:03<02:15, 21.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 54/3000 [00:03<02:42, 18.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 57/3000 [00:03<02:41, 18.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 66/3000 [00:04<02:18, 21.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 72/3000 [00:04<02:22, 20.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▎ | 75/3000 [00:04<02:50, 17.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 82/3000 [00:04<02:11, 22.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 85/3000 [00:04<02:15, 21.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 88/3000 [00:05<02:33, 18.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 97/3000 [00:05<02:41, 17.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 100/3000 [00:05<02:50, 16.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 105/3000 [00:06<02:21, 20.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 112/3000 [00:06<01:58, 24.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 120/3000 [00:06<02:02, 23.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 123/3000 [00:07<02:43, 17.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 129/3000 [00:07<02:04, 23.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 132/3000 [00:07<02:19, 20.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 138/3000 [00:07<02:15, 21.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 141/3000 [00:07<02:37, 18.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 147/3000 [00:08<02:06, 22.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 150/3000 [00:08<02:15, 21.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 157/3000 [00:08<02:25, 19.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 161/3000 [00:08<02:19, 20.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 166/3000 [00:09<02:29, 19.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 170/3000 [00:09<02:02, 23.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 176/3000 [00:09<02:26, 19.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 183/3000 [00:09<01:58, 23.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 189/3000 [00:10<02:33, 18.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 196/3000 [00:10<02:04, 22.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 200/3000 [00:10<01:56, 24.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 203/3000 [00:11<02:46, 16.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 208/3000 [00:11<02:22, 19.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 214/3000 [00:11<02:10, 21.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 218/3000 [00:11<01:53, 24.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 221/3000 [00:11<02:18, 20.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 227/3000 [00:12<02:36, 17.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 229/3000 [00:12<02:49, 16.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 234/3000 [00:12<02:15, 20.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 240/3000 [00:12<02:03, 22.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 246/3000 [00:13<02:19, 19.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 252/3000 [00:13<02:10, 20.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 257/3000 [00:13<02:07, 21.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 260/3000 [00:13<02:12, 20.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 263/3000 [00:13<02:23, 19.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 266/3000 [00:14<02:23, 19.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 276/3000 [00:14<01:51, 24.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 279/3000 [00:14<02:16, 19.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 282/3000 [00:14<02:47, 16.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 286/3000 [00:15<02:30, 18.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 295/3000 [00:15<01:45, 25.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 303/3000 [00:15<02:08, 21.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [00:16<02:29, 18.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 313/3000 [00:16<02:09, 20.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 316/3000 [00:16<02:04, 21.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 323/3000 [00:16<02:03, 21.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 327/3000 [00:17<02:17, 19.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 330/3000 [00:17<02:11, 20.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 336/3000 [00:17<02:18, 19.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [00:17<02:29, 17.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 342/3000 [00:17<02:21, 18.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 350/3000 [00:18<02:00, 21.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 358/3000 [00:18<02:11, 20.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 365/3000 [00:18<01:58, 22.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 369/3000 [00:19<01:51, 23.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 372/3000 [00:19<02:19, 18.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 376/3000 [00:19<03:21, 13.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 382/3000 [00:20<03:46, 11.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 396/3000 [00:20<02:23, 18.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 399/3000 [00:21<02:14, 19.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 404/3000 [00:21<02:46, 15.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 411/3000 [00:21<02:08, 20.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 414/3000 [00:21<02:05, 20.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 421/3000 [00:22<01:57, 22.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 427/3000 [00:22<02:31, 16.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 433/3000 [00:23<02:18, 18.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 438/3000 [00:23<01:55, 22.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 441/3000 [00:23<01:58, 21.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 447/3000 [00:23<02:02, 20.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 450/3000 [00:23<02:07, 19.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 455/3000 [00:24<02:32, 16.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 462/3000 [00:24<01:55, 21.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 465/3000 [00:24<02:04, 20.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 471/3000 [00:24<02:09, 19.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 474/3000 [00:25<02:16, 18.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 479/3000 [00:25<02:32, 16.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [00:25<01:55, 21.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 488/3000 [00:25<02:02, 20.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 491/3000 [00:25<01:56, 21.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 498/3000 [00:26<01:54, 21.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 505/3000 [00:26<01:59, 20.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 508/3000 [00:26<01:56, 21.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 514/3000 [00:27<02:08, 19.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 517/3000 [00:27<02:25, 17.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 524/3000 [00:27<02:06, 19.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 528/3000 [00:27<01:45, 23.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 531/3000 [00:28<02:27, 16.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 534/3000 [00:28<02:28, 16.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 540/3000 [00:28<02:18, 17.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 548/3000 [00:28<01:59, 20.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 551/3000 [00:29<02:05, 19.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 558/3000 [00:29<01:52, 21.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 561/3000 [00:29<02:16, 17.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 566/3000 [00:29<02:23, 16.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 572/3000 [00:30<02:01, 20.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 576/3000 [00:30<01:50, 21.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 579/3000 [00:30<02:04, 19.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 582/3000 [00:30<02:05, 19.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 588/3000 [00:30<01:57, 20.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 595/3000 [00:31<02:16, 17.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 598/3000 [00:31<02:17, 17.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 606/3000 [00:31<01:52, 21.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 613/3000 [00:32<01:54, 20.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 619/3000 [00:32<02:09, 18.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 625/3000 [00:32<01:39, 23.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 628/3000 [00:33<02:17, 17.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 634/3000 [00:33<02:00, 19.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 640/3000 [00:33<02:02, 19.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 643/3000 [00:33<02:05, 18.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 646/3000 [00:34<02:16, 17.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 653/3000 [00:34<01:49, 21.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 659/3000 [00:34<01:48, 21.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 662/3000 [00:35<02:41, 14.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 669/3000 [00:35<01:53, 20.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▎ | 675/3000 [00:35<01:56, 20.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 684/3000 [00:36<02:04, 18.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 687/3000 [00:36<02:00, 19.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 693/3000 [00:36<02:11, 17.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 698/3000 [00:36<01:41, 22.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 702/3000 [00:36<01:44, 21.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 708/3000 [00:37<01:56, 19.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 714/3000 [00:37<02:02, 18.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 718/3000 [00:37<01:54, 19.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 721/3000 [00:38<01:59, 19.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 727/3000 [00:38<02:04, 18.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 731/3000 [00:38<01:41, 22.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 734/3000 [00:38<01:50, 20.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 739/3000 [00:39<02:21, 15.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 747/3000 [00:39<01:39, 22.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 754/3000 [00:39<01:53, 19.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 757/3000 [00:39<01:55, 19.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 762/3000 [00:40<02:17, 16.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 769/3000 [00:40<01:44, 21.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 776/3000 [00:40<01:43, 21.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 779/3000 [00:41<02:00, 18.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 781/3000 [00:41<02:23, 15.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 788/3000 [00:41<01:48, 20.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 791/3000 [00:41<01:47, 20.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 796/3000 [00:42<02:09, 17.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 800/3000 [00:42<01:45, 20.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 803/3000 [00:42<01:53, 19.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 809/3000 [00:42<01:57, 18.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 812/3000 [00:42<01:50, 19.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 819/3000 [00:43<01:50, 19.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 822/3000 [00:43<01:59, 18.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 830/3000 [00:43<01:47, 20.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 833/3000 [00:43<01:51, 19.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 835/3000 [00:44<01:57, 18.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 840/3000 [00:44<02:01, 17.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 847/3000 [00:44<01:49, 19.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 851/3000 [00:44<01:34, 22.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 854/3000 [00:45<01:42, 20.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 859/3000 [00:45<02:09, 16.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 867/3000 [00:45<01:41, 20.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 870/3000 [00:45<01:46, 20.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 874/3000 [00:46<01:33, 22.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 877/3000 [00:46<01:51, 19.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 886/3000 [00:46<01:41, 20.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [00:46<01:45, 19.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 896/3000 [00:47<02:16, 15.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 902/3000 [00:47<01:44, 20.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 905/3000 [00:47<01:39, 20.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 911/3000 [00:48<01:46, 19.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 918/3000 [00:48<01:52, 18.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 922/3000 [00:48<01:41, 20.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 925/3000 [00:48<01:48, 19.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 931/3000 [00:49<01:55, 17.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 936/3000 [00:49<01:46, 19.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 943/3000 [00:49<01:23, 24.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 946/3000 [00:49<01:42, 20.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 952/3000 [00:50<01:42, 19.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 957/3000 [00:50<01:59, 17.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 962/3000 [00:50<01:28, 23.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 965/3000 [00:50<01:48, 18.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 970/3000 [00:51<02:05, 16.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▎ | 975/3000 [00:51<01:32, 21.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 982/3000 [00:51<01:45, 19.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 985/3000 [00:52<01:42, 19.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 991/3000 [00:52<01:48, 18.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 995/3000 [00:52<01:41, 19.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 998/3000 [00:52<01:45, 18.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1001/3000 [00:52<01:54, 17.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1006/3000 [00:53<01:53, 17.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1012/3000 [00:53<01:43, 19.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1017/3000 [00:53<01:27, 22.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1023/3000 [00:54<01:33, 21.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1029/3000 [00:54<01:58, 16.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1033/3000 [00:54<01:37, 20.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1037/3000 [00:54<01:24, 23.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1040/3000 [00:55<01:46, 18.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1046/3000 [00:55<01:49, 17.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1051/3000 [00:55<01:31, 21.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1056/3000 [00:55<01:28, 21.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1059/3000 [00:56<01:48, 17.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1067/3000 [00:56<01:37, 19.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1070/3000 [00:56<01:31, 21.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1073/3000 [00:56<01:37, 19.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1079/3000 [00:57<02:02, 15.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1084/3000 [00:57<01:48, 17.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1090/3000 [00:57<01:17, 24.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1093/3000 [00:57<01:32, 20.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1096/3000 [00:58<02:01, 15.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1104/3000 [00:58<01:28, 21.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1110/3000 [00:58<01:19, 23.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1113/3000 [00:58<01:41, 18.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1116/3000 [00:59<02:00, 15.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1125/3000 [00:59<01:23, 22.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1131/3000 [00:59<01:18, 23.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1137/3000 [01:00<01:43, 17.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1140/3000 [01:00<01:38, 18.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1149/3000 [01:00<01:26, 21.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1154/3000 [01:01<01:47, 17.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1158/3000 [01:01<01:47, 17.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1162/3000 [01:01<01:28, 20.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1166/3000 [01:01<01:30, 20.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1172/3000 [01:02<01:38, 18.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1179/3000 [01:02<01:31, 19.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1182/3000 [01:02<01:36, 18.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1187/3000 [01:02<01:45, 17.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1190/3000 [01:03<01:40, 18.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1195/3000 [01:03<01:52, 16.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1201/3000 [01:03<01:27, 20.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1207/3000 [01:03<01:25, 21.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1210/3000 [01:04<01:31, 19.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1215/3000 [01:04<01:58, 15.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1220/3000 [01:04<01:24, 21.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1223/3000 [01:04<01:39, 17.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1228/3000 [01:05<02:03, 14.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1233/3000 [01:05<01:53, 15.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1241/3000 [01:05<01:15, 23.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1244/3000 [01:06<02:20, 12.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1250/3000 [01:06<01:51, 15.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1254/3000 [01:06<01:36, 18.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1257/3000 [01:06<01:38, 17.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1262/3000 [01:07<01:53, 15.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1267/3000 [01:07<01:38, 17.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1272/3000 [01:07<01:27, 19.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▎ | 1275/3000 [01:07<01:21, 21.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1280/3000 [01:08<01:48, 15.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1282/3000 [01:08<01:42, 16.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1284/3000 [01:08<02:06, 13.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1289/3000 [01:08<01:50, 15.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1296/3000 [01:09<01:26, 19.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1301/3000 [01:09<01:47, 15.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1308/3000 [01:10<01:39, 16.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1311/3000 [01:10<01:37, 17.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1316/3000 [01:10<01:40, 16.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1321/3000 [01:10<01:25, 19.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1326/3000 [01:11<01:36, 17.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1332/3000 [01:11<01:24, 19.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1336/3000 [01:11<01:21, 20.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1340/3000 [01:11<01:17, 21.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1343/3000 [01:11<01:27, 19.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1346/3000 [01:12<01:44, 15.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1356/3000 [01:12<01:19, 20.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1360/3000 [01:12<01:10, 23.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1363/3000 [01:13<01:24, 19.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1370/3000 [01:13<01:19, 20.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1373/3000 [01:13<01:15, 21.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1376/3000 [01:13<01:24, 19.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1384/3000 [01:14<01:21, 19.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1387/3000 [01:14<01:41, 15.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1391/3000 [01:14<01:25, 18.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1398/3000 [01:15<01:30, 17.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1401/3000 [01:15<01:22, 19.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1407/3000 [01:15<01:27, 18.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1410/3000 [01:15<01:37, 16.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1415/3000 [01:15<01:19, 19.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1418/3000 [01:16<01:23, 18.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1424/3000 [01:16<01:25, 18.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1427/3000 [01:16<01:18, 20.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1430/3000 [01:16<01:24, 18.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1436/3000 [01:17<01:27, 17.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1440/3000 [01:17<01:09, 22.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1446/3000 [01:17<01:17, 20.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1451/3000 [01:17<00:59, 25.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1454/3000 [01:18<01:41, 15.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [01:18<03:07, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1462/3000 [01:18<02:08, 12.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1466/3000 [01:19<01:44, 14.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1474/3000 [01:19<01:42, 14.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1477/3000 [01:19<01:33, 16.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1483/3000 [01:20<01:24, 17.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1486/3000 [01:20<01:22, 18.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1492/3000 [01:20<01:24, 17.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1498/3000 [01:20<01:23, 17.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1500/3000 [01:21<01:25, 17.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1508/3000 [01:21<01:14, 20.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1511/3000 [01:21<01:10, 21.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1517/3000 [01:21<01:14, 19.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1520/3000 [01:22<01:13, 20.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1527/3000 [01:22<01:19, 18.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1531/3000 [01:22<01:04, 22.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1538/3000 [01:22<01:10, 20.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1541/3000 [01:23<01:12, 20.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1547/3000 [01:23<01:17, 18.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1553/3000 [01:23<01:14, 19.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1556/3000 [01:24<01:20, 17.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1564/3000 [01:24<01:06, 21.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1570/3000 [01:24<01:10, 20.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1573/3000 [01:24<01:19, 17.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▎ | 1575/3000 [01:25<01:21, 17.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1579/3000 [01:25<01:42, 13.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1585/3000 [01:25<01:02, 22.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1588/3000 [01:25<01:28, 16.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1593/3000 [01:26<01:14, 18.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1598/3000 [01:26<01:29, 15.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1603/3000 [01:26<01:14, 18.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1611/3000 [01:26<01:01, 22.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1614/3000 [01:27<01:03, 21.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1617/3000 [01:27<01:27, 15.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1619/3000 [01:27<01:40, 13.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1625/3000 [01:27<01:12, 18.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1628/3000 [01:28<01:04, 21.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1634/3000 [01:28<01:24, 16.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1639/3000 [01:28<01:17, 17.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1642/3000 [01:28<01:24, 16.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1646/3000 [01:29<01:05, 20.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1649/3000 [01:29<01:08, 19.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1657/3000 [01:29<01:21, 16.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1661/3000 [01:30<01:16, 17.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1666/3000 [01:30<01:07, 19.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1669/3000 [01:30<01:09, 19.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1675/3000 [01:30<01:10, 18.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1681/3000 [01:31<01:07, 19.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1686/3000 [01:31<01:17, 17.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1690/3000 [01:31<01:03, 20.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1693/3000 [01:31<01:08, 19.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1696/3000 [01:31<01:20, 16.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1701/3000 [01:32<01:22, 15.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1703/3000 [01:32<01:28, 14.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1711/3000 [01:32<01:07, 19.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1717/3000 [01:33<01:15, 16.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1723/3000 [01:33<01:06, 19.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1726/3000 [01:33<01:01, 20.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1729/3000 [01:33<01:15, 16.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1734/3000 [01:34<01:26, 14.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1739/3000 [01:34<01:00, 20.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1743/3000 [01:34<00:53, 23.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1746/3000 [01:34<01:02, 20.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1751/3000 [01:35<01:17, 16.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1757/3000 [01:35<01:03, 19.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1760/3000 [01:35<00:59, 20.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1765/3000 [01:35<00:51, 23.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1771/3000 [01:36<01:21, 15.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1774/3000 [01:36<01:11, 17.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1781/3000 [01:36<00:51, 23.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1784/3000 [01:36<01:02, 19.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1790/3000 [01:37<01:18, 15.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1797/3000 [01:37<00:57, 20.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1803/3000 [01:38<01:01, 19.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1807/3000 [01:38<01:04, 18.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1812/3000 [01:38<01:14, 16.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1818/3000 [01:38<01:00, 19.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1822/3000 [01:39<00:50, 23.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1827/3000 [01:39<00:56, 20.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1832/3000 [01:39<01:24, 13.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1838/3000 [01:39<00:57, 20.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1844/3000 [01:40<00:57, 20.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1847/3000 [01:40<01:03, 18.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1852/3000 [01:40<01:08, 16.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1858/3000 [01:41<01:04, 17.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1862/3000 [01:41<00:57, 19.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1870/3000 [01:41<00:46, 24.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1873/3000 [01:41<01:03, 17.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1876/3000 [01:42<01:03, 17.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1879/3000 [01:42<01:06, 16.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1885/3000 [01:42<01:01, 18.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1889/3000 [01:42<00:51, 21.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1895/3000 [01:43<00:58, 18.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1898/3000 [01:43<01:05, 16.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1900/3000 [01:43<01:14, 14.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1903/3000 [01:43<01:10, 15.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1910/3000 [01:43<00:53, 20.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1913/3000 [01:44<00:48, 22.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1916/3000 [01:44<01:06, 16.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1922/3000 [01:44<01:00, 17.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1926/3000 [01:44<00:52, 20.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1931/3000 [01:45<00:45, 23.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1934/3000 [01:45<00:58, 18.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1942/3000 [01:45<00:53, 19.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1950/3000 [01:46<00:48, 21.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1953/3000 [01:46<00:46, 22.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1956/3000 [01:46<01:09, 14.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1961/3000 [01:46<01:04, 16.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1966/3000 [01:46<00:52, 19.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1969/3000 [01:47<01:01, 16.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1975/3000 [01:47<00:59, 17.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1981/3000 [01:47<00:46, 21.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1984/3000 [01:47<00:47, 21.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1987/3000 [01:48<01:08, 14.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1990/3000 [01:48<01:16, 13.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1996/3000 [01:48<00:51, 19.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2001/3000 [01:48<00:43, 23.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2005/3000 [01:49<00:58, 17.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2013/3000 [01:49<00:52, 18.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2020/3000 [01:50<00:48, 20.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2023/3000 [01:50<00:48, 20.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2026/3000 [01:50<00:53, 18.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2031/3000 [01:50<01:01, 15.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2036/3000 [01:51<00:56, 17.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2044/3000 [01:51<00:42, 22.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2047/3000 [01:51<00:58, 16.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2051/3000 [01:51<00:55, 17.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2057/3000 [01:52<00:50, 18.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2061/3000 [01:52<00:41, 22.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2064/3000 [01:52<00:48, 19.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2071/3000 [01:52<00:52, 17.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2077/3000 [01:53<00:43, 21.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2080/3000 [01:53<00:44, 20.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2083/3000 [01:53<00:57, 15.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2086/3000 [01:53<00:49, 18.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2094/3000 [01:54<00:44, 20.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2097/3000 [01:54<00:48, 18.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2100/3000 [01:54<00:49, 18.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2106/3000 [01:54<00:52, 17.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2109/3000 [01:55<00:53, 16.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2117/3000 [01:55<00:40, 21.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2123/3000 [01:55<00:39, 22.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2126/3000 [01:55<00:51, 17.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2128/3000 [01:56<00:52, 16.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2134/3000 [01:56<00:42, 20.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2138/3000 [01:56<00:43, 19.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2144/3000 [01:56<00:38, 22.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2147/3000 [01:56<00:45, 18.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2152/3000 [01:57<00:47, 17.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2155/3000 [01:57<00:43, 19.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2161/3000 [01:57<00:45, 18.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2165/3000 [01:57<00:38, 21.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2170/3000 [01:58<00:59, 13.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2176/3000 [01:58<00:47, 17.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2181/3000 [01:58<00:38, 21.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2184/3000 [01:59<00:40, 19.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2187/3000 [01:59<00:54, 15.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2195/3000 [01:59<00:38, 20.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2199/3000 [01:59<00:39, 20.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2202/3000 [01:59<00:39, 20.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2205/3000 [02:00<00:45, 17.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2207/3000 [02:00<00:57, 13.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2217/3000 [02:00<00:34, 22.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2220/3000 [02:00<00:37, 20.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2226/3000 [02:01<00:48, 15.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2231/3000 [02:01<00:44, 17.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2239/3000 [02:01<00:31, 24.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2242/3000 [02:02<00:46, 16.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2248/3000 [02:02<00:42, 17.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2251/3000 [02:02<00:39, 18.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2255/3000 [02:03<00:38, 19.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2260/3000 [02:03<00:50, 14.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2262/3000 [02:03<00:48, 15.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2270/3000 [02:03<00:36, 19.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2273/3000 [02:04<00:33, 21.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2276/3000 [02:04<00:37, 19.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2282/3000 [02:04<00:37, 19.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2285/3000 [02:04<00:34, 20.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2292/3000 [02:05<00:33, 20.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2295/3000 [02:05<00:49, 14.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2302/3000 [02:05<00:38, 18.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2308/3000 [02:05<00:34, 20.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2314/3000 [02:06<00:43, 15.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2321/3000 [02:06<00:31, 21.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2324/3000 [02:06<00:33, 20.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2327/3000 [02:07<00:36, 18.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2333/3000 [02:07<00:38, 17.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2335/3000 [02:07<00:42, 15.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2342/3000 [02:07<00:36, 18.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2345/3000 [02:08<00:33, 19.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2351/3000 [02:08<00:41, 15.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2356/3000 [02:08<00:33, 19.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2361/3000 [02:09<00:32, 19.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2364/3000 [02:09<00:40, 15.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2366/3000 [02:09<00:41, 15.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2369/3000 [02:09<00:37, 16.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2374/3000 [02:09<00:39, 15.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2381/3000 [02:10<00:27, 22.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2384/3000 [02:10<00:38, 16.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2387/3000 [02:10<00:38, 15.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2391/3000 [02:11<00:49, 12.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2399/3000 [02:11<00:29, 20.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2402/3000 [02:11<00:45, 13.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2406/3000 [02:11<00:38, 15.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2412/3000 [02:12<00:31, 18.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2418/3000 [02:12<00:27, 21.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2421/3000 [02:12<00:34, 16.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2424/3000 [02:12<00:30, 19.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2432/3000 [02:13<00:26, 21.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2435/3000 [02:13<00:34, 16.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2438/3000 [02:13<00:32, 17.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2444/3000 [02:13<00:31, 17.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2448/3000 [02:14<00:35, 15.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2454/3000 [02:14<00:25, 21.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2457/3000 [02:14<00:30, 18.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2459/3000 [02:14<00:30, 17.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2466/3000 [02:15<00:27, 19.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2471/3000 [02:15<00:30, 17.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▎ | 2475/3000 [02:15<00:27, 19.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2481/3000 [02:15<00:25, 20.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2487/3000 [02:16<00:31, 16.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2489/3000 [02:16<00:30, 16.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2495/3000 [02:16<00:29, 16.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2502/3000 [02:17<00:24, 20.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2505/3000 [02:17<00:27, 18.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2510/3000 [02:17<00:26, 18.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2514/3000 [02:17<00:29, 16.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2517/3000 [02:17<00:25, 18.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2523/3000 [02:18<00:21, 22.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2526/3000 [02:18<00:19, 23.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2532/3000 [02:18<00:25, 18.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2535/3000 [02:18<00:28, 16.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2539/3000 [02:19<00:27, 16.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2543/3000 [02:19<00:22, 20.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2546/3000 [02:19<00:42, 10.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2555/3000 [02:20<00:43, 10.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2558/3000 [02:21<00:35, 12.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2566/3000 [02:21<00:32, 13.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2573/3000 [02:21<00:26, 15.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2581/3000 [02:22<00:19, 21.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2584/3000 [02:22<00:21, 19.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2587/3000 [02:22<00:19, 20.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2593/3000 [02:22<00:20, 20.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2596/3000 [02:23<00:25, 15.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2601/3000 [02:23<00:22, 17.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2603/3000 [02:23<00:23, 17.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2607/3000 [02:23<00:27, 14.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2613/3000 [02:24<00:19, 20.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2620/3000 [02:24<00:20, 18.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2625/3000 [02:24<00:21, 17.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2630/3000 [02:25<00:18, 19.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2633/3000 [02:25<00:20, 18.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2639/3000 [02:25<00:19, 18.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2643/3000 [02:25<00:18, 19.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2647/3000 [02:25<00:15, 22.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2653/3000 [02:26<00:18, 18.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2655/3000 [02:26<00:22, 15.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [02:26<00:19, 17.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2666/3000 [02:27<00:17, 18.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2670/3000 [02:27<00:15, 21.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2673/3000 [02:27<00:16, 20.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2678/3000 [02:27<00:17, 18.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2684/3000 [02:28<00:18, 17.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2690/3000 [02:28<00:14, 21.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2693/3000 [02:28<00:14, 21.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2696/3000 [02:28<00:17, 17.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2700/3000 [02:29<00:19, 15.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2706/3000 [02:29<00:15, 18.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2709/3000 [02:29<00:16, 17.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2715/3000 [02:29<00:15, 18.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2718/3000 [02:30<00:16, 17.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2720/3000 [02:30<00:16, 17.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2725/3000 [02:30<00:15, 17.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2728/3000 [02:30<00:15, 17.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2733/3000 [02:30<00:14, 17.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2738/3000 [02:31<00:13, 19.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2740/3000 [02:31<00:14, 17.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2745/3000 [02:31<00:15, 16.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2749/3000 [02:31<00:15, 15.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2758/3000 [02:32<00:08, 27.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2761/3000 [02:32<00:11, 20.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2764/3000 [02:32<00:12, 19.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2772/3000 [02:32<00:11, 20.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2778/3000 [02:33<00:10, 21.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [02:33<00:09, 23.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2787/3000 [02:33<00:12, 16.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2790/3000 [02:33<00:12, 16.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2797/3000 [02:34<00:09, 20.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2801/3000 [02:34<00:08, 24.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2804/3000 [02:34<00:12, 15.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2809/3000 [02:35<00:12, 15.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2817/3000 [02:35<00:07, 24.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2821/3000 [02:35<00:06, 26.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2831/3000 [02:36<00:07, 22.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2838/3000 [02:36<00:07, 21.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2841/3000 [02:36<00:10, 15.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2849/3000 [02:37<00:07, 18.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2853/3000 [02:37<00:06, 22.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2857/3000 [02:37<00:08, 16.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2863/3000 [02:38<00:08, 15.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2871/3000 [02:38<00:06, 21.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2874/3000 [02:38<00:05, 21.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2877/3000 [02:38<00:07, 16.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2883/3000 [02:39<00:06, 18.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2889/3000 [02:39<00:06, 17.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2893/3000 [02:39<00:04, 21.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2896/3000 [02:39<00:05, 18.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2901/3000 [02:40<00:05, 16.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2903/3000 [02:40<00:06, 15.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2910/3000 [02:40<00:04, 19.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2917/3000 [02:40<00:04, 20.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2922/3000 [02:41<00:04, 18.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2927/3000 [02:41<00:04, 15.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2934/3000 [02:41<00:02, 22.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2937/3000 [02:42<00:03, 18.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2943/3000 [02:42<00:03, 18.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2946/3000 [02:42<00:02, 20.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2949/3000 [02:42<00:03, 16.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2955/3000 [02:43<00:02, 17.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2958/3000 [02:43<00:02, 19.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2964/3000 [02:43<00:02, 16.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2966/3000 [02:43<00:02, 12.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2968/3000 [02:44<00:02, 13.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2974/3000 [02:44<00:01, 19.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2977/3000 [02:44<00:01, 18.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2980/3000 [02:44<00:01, 19.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2985/3000 [02:44<00:01, 14.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2990/3000 [02:45<00:00, 15.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2998/3000 [02:45<00:00, 19.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|██████████| 3000/3000 [02:46<00:00, 18.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-13 09:52:20.045\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mEvaluation metrics (before optimization): {'f1': 0.0, 'em': 0.0, 'acc': 0.49733333333333335}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "932092e0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'f1': 0.0, 'em': 0.0, 'acc': 0.49733333333333335}"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "b59c472f",
"metadata": {},
"outputs": [],
"source": [
"#generated_workflow\n",
"qa_workflow = {\n",
" \"goal\": \"Provide a concise answer to the question using relevant context. The answer must be straightforward and avoid unnecessary explanations.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"generate_answer\",\n",
" \"description\": \"Extract and formulate an answer from the given context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question that needs to be answered.\"},\n",
" {\"name\": \"context\", \"type\": \"str\", \"required\": True, \"description\": \"The background information pertinent to the question.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ]\n",
"}\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question\"])\n",
" return {\"question\": problem, 'context':''}"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "2c62cea3",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://tianyuliu-hua-raredisea-resource.cognitiveservices.azure.com/\"\n",
"os.environ[\"AZURE_OPENAI_KEY\"] = \"2pa9h2ZIN1lQepFWwYADlXIKIansa9KPhxMoumeGbRQ08f2uDTXiJQQJ99BKACHYHv6XJ3w3AAAAACOGsQIt\"\n",
"os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n",
"llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n",
" azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
" azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n",
" api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"\n",
"executor_llm = LiteLLM(config=llm_config)\n",
"optimizer_llm = LiteLLM(config=llm_config)\n",
"llm = executor_llm"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "d965a023",
"metadata": {},
"outputs": [],
"source": [
"\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(qa_workflow)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "67291165",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"3000"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._test_data)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "76b45e2f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-13 09:53:37.184\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 7/3000 [00:00<04:54, 10.16it/s]Task exception was never retrieved\n",
"future: exception=RuntimeError('Event loop is closed')>\n",
"Traceback (most recent call last):\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/tasks.py\", line 277, in __step\n",
" result = coro.send(None)\n",
" ^^^^^^^^^^^^^^^\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/utils.py\", line 873, in _client_async_logging_helper\n",
" GLOBAL_LOGGING_WORKER.ensure_initialized_and_enqueue(\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 322, in ensure_initialized_and_enqueue\n",
" self.enqueue(async_coroutine)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 131, in enqueue\n",
" self._queue.put_nowait(task)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 147, in put_nowait\n",
" self._wakeup_next(self._getters)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 63, in _wakeup_next\n",
" waiter.set_result(None)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 263, in set_result\n",
" self.__schedule_callbacks()\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 173, in __schedule_callbacks\n",
" self._loop.call_soon(callback, self, context=ctx)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 762, in call_soon\n",
" self._check_closed()\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 520, in _check_closed\n",
" raise RuntimeError('Event loop is closed')\n",
"RuntimeError: Event loop is closed\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 17/3000 [00:01<02:04, 23.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 27/3000 [00:01<02:38, 18.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 36/3000 [00:01<01:54, 25.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 44/3000 [00:02<02:30, 19.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 52/3000 [00:02<01:58, 24.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 56/3000 [00:02<01:57, 25.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 60/3000 [00:03<01:47, 27.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 64/3000 [00:03<02:45, 17.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 76/3000 [00:03<01:49, 26.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 84/3000 [00:04<02:11, 22.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 94/3000 [00:04<01:41, 28.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 98/3000 [00:04<01:50, 26.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 106/3000 [00:05<02:04, 23.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 114/3000 [00:05<01:45, 27.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 117/3000 [00:05<02:04, 23.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 124/3000 [00:05<02:05, 22.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 129/3000 [00:06<01:59, 24.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:06<01:50, 25.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 141/3000 [00:06<01:57, 24.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 147/3000 [00:06<01:57, 24.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:06<01:31, 30.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 157/3000 [00:07<01:56, 24.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 160/3000 [00:07<01:52, 25.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 163/3000 [00:07<02:25, 19.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 173/3000 [00:07<01:38, 28.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 177/3000 [00:08<01:55, 24.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 181/3000 [00:08<02:31, 18.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 184/3000 [00:08<02:27, 19.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 191/3000 [00:08<02:47, 16.75it/s]Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.773613764)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.652860819)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.732868999)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.623856131)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.656304304)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.094575545)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.990900447)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.629211527)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.690694346)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.712088349)])']\n",
"connector: \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 198/3000 [00:09<02:33, 18.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 207/3000 [00:09<02:21, 19.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 217/3000 [00:10<01:58, 23.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 221/3000 [00:10<02:02, 22.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 224/3000 [00:10<01:59, 23.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 230/3000 [00:10<02:15, 20.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 236/3000 [00:11<02:09, 21.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 241/3000 [00:11<01:43, 26.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 244/3000 [00:11<01:57, 23.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 251/3000 [00:11<02:06, 21.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 258/3000 [00:11<01:47, 25.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 261/3000 [00:12<01:54, 23.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 268/3000 [01:00<2:13:44, 2.94s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 275/3000 [01:01<1:06:57, 1.47s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 283/3000 [01:01<31:27, 1.44it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 286/3000 [01:01<24:11, 1.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 292/3000 [01:02<13:47, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 295/3000 [01:02<10:52, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 300/3000 [01:02<06:59, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 303/3000 [01:02<05:48, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 309/3000 [01:03<04:19, 10.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 317/3000 [01:03<02:40, 16.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 322/3000 [01:03<02:16, 19.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 325/3000 [01:03<02:49, 15.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 336/3000 [01:04<01:57, 22.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 341/3000 [01:04<01:48, 24.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 344/3000 [01:04<02:06, 21.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 350/3000 [01:04<02:02, 21.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 357/3000 [01:05<01:25, 30.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 361/3000 [01:05<02:00, 21.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 368/3000 [01:05<01:54, 22.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 371/3000 [01:05<01:52, 23.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 378/3000 [01:05<01:30, 28.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 382/3000 [01:06<01:42, 25.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 388/3000 [01:06<02:03, 21.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 395/3000 [01:06<01:44, 24.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 401/3000 [01:07<02:10, 19.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 410/3000 [01:07<01:36, 26.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 414/3000 [01:07<01:35, 27.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 417/3000 [01:07<02:12, 19.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 423/3000 [01:08<02:17, 18.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 428/3000 [01:08<01:44, 24.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 435/3000 [01:08<01:48, 23.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 438/3000 [01:08<01:44, 24.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 444/3000 [01:09<01:58, 21.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 451/3000 [01:09<01:45, 24.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 459/3000 [01:09<01:30, 28.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 462/3000 [01:09<02:03, 20.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 465/3000 [01:09<02:01, 20.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 474/3000 [01:10<02:04, 20.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 479/3000 [01:10<01:40, 24.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 488/3000 [01:11<01:49, 23.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 493/3000 [01:11<01:44, 23.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 496/3000 [01:11<02:06, 19.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 505/3000 [01:11<01:46, 23.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 512/3000 [01:11<01:30, 27.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 516/3000 [01:12<01:48, 22.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 522/3000 [01:12<01:55, 21.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 528/3000 [01:12<01:25, 28.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 533/3000 [01:13<03:21, 12.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 534/3000 [02:00<2:46:18, 4.05s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 536/3000 [02:01<2:14:51, 3.28s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 543/3000 [02:01<1:02:15, 1.52s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 546/3000 [02:01<45:23, 1.11s/it] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 553/3000 [02:02<22:51, 1.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 560/3000 [02:02<12:27, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 567/3000 [02:02<07:47, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 570/3000 [02:03<06:51, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 581/3000 [02:03<03:14, 12.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 585/3000 [02:03<03:20, 12.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 589/3000 [02:03<02:58, 13.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 598/3000 [02:04<01:55, 20.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 602/3000 [02:04<02:01, 19.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 605/3000 [02:04<02:04, 19.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 611/3000 [02:04<01:59, 19.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 618/3000 [02:05<01:37, 24.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 621/3000 [02:05<01:46, 22.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 629/3000 [02:05<01:36, 24.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 635/3000 [02:05<01:33, 25.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 641/3000 [02:05<01:35, 24.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 644/3000 [02:06<01:30, 25.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 650/3000 [02:06<01:51, 21.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 655/3000 [02:06<01:38, 23.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 658/3000 [02:06<01:55, 20.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 662/3000 [02:06<01:39, 23.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 671/3000 [02:07<01:47, 21.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 677/3000 [02:07<01:45, 21.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 680/3000 [02:07<01:51, 20.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 686/3000 [02:08<01:37, 23.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 692/3000 [02:08<02:01, 19.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Evaluating workflow: 23%|██▎ | 695/3000 [02:08<03:15, 11.79it/s]Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.780804795)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.71084237)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.853598557)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.761616557)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.10639073)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.756386062)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.601480969)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329380.718464456)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.612072981)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.418332547)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.670698469)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.850732692)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.567546099)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.773091948)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.871249242)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.824697288)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329381.7618284)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.753055645)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.549786978)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.135991343)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.246818432)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.532592386)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.608239653)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.423820769)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329382.708359432)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.543025858)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.239380108)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.273368386)])']\n",
"connector: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.446719745)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.615908616)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.935133384)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.294765826)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.336299225)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.369293271)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.349403765)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.360228839)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.418855196)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.167077464)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.16088939)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.02715133)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.078029417)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.343340354)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329383.985132912)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.175944279)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.074227041)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.210764081)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.700981687)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329385.024579639)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.987621019)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.708219194)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.69529935)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.668438174)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329385.447974777)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329384.745162373)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329385.307972811)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329385.848449036)])']\n",
"connector: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.319849802)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329385.95288757)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.021394549)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.541332586)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.398844574)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.92931504)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.944657147)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.589441643)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.612324537)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329387.184095423)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.693185195)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.585420949)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329386.569194199)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329387.26577572)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2329387.52878922)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: