{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"id": "2a8e89d4",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "54f40417",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from evoagentx.benchmark import PertQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "f736c579",
"metadata": {},
"outputs": [],
"source": [
"from evoagentx.benchmark import MolQA"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "54fa1aa5",
"metadata": {},
"outputs": [],
"source": [
"from evoagentx.benchmark import MolQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question\"])\n",
" return {\"problem\": problem}\n",
"\n",
"\n",
"hotpotqa_graph_data = {\n",
" \"goal\": \"Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"answer_generate\",\n",
" \"description\": \"Answer the question based on the context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"problem\", \"type\": \"str\", \"required\": True, \"description\": \"The problem to solve.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The answer to the problem.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1ebace55",
"metadata": {},
"outputs": [],
"source": [
"api_key = \"\"\n",
"# Function to encode the image\n",
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = api_key\n",
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
"\n",
"\n",
"llm_config = OpenAILLMConfig(model=\"gpt-4o-mini-2024-07-18\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"llm = OpenAILLM(config=llm_config)\n",
"executor_llm = OpenAILLM(config=llm_config)\n",
"optimizer_llm = OpenAILLM(config=llm_config)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "20e078fa",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 15:56:23.788\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hitqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/antibiotic_pred/ec_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-10 15:56:23.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hitqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/antibiotic_pred/ec_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-10 15:56:23.967\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hitqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/antibiotic_pred/ec_test.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = MolQA()\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "b485d4ad",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"3000"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._test_data)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "78d5904e",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# len(benchmark._fulldata)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e9722789",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-09 21:40:47.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n",
"Evaluating workflow: 0%| | 1/3000 [00:05<4:16:44, 5.14s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 2/3000 [00:05<1:54:43, 2.30s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 4/3000 [00:05<44:26, 1.12it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 5/3000 [00:05<30:18, 1.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 9/3000 [00:06<14:11, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 11/3000 [00:06<12:21, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 12/3000 [00:07<16:14, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 15/3000 [00:07<11:01, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 16/3000 [00:08<10:59, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 17/3000 [00:08<12:00, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 18/3000 [00:09<16:30, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 19/3000 [00:09<15:43, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 20/3000 [00:09<15:31, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 22/3000 [00:09<12:24, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 23/3000 [00:10<12:00, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 24/3000 [00:10<15:19, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 28/3000 [00:10<07:21, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 29/3000 [00:11<09:03, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 30/3000 [00:11<09:48, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 31/3000 [00:11<12:54, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 33/3000 [00:12<16:46, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 35/3000 [00:13<13:35, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 36/3000 [00:13<12:33, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 37/3000 [00:13<12:59, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 39/3000 [00:13<09:54, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 41/3000 [00:14<12:04, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 42/3000 [00:15<14:06, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 44/3000 [00:15<11:13, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 46/3000 [00:15<10:59, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 47/3000 [00:15<09:49, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 50/3000 [00:16<08:30, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 52/3000 [00:16<10:44, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 56/3000 [00:18<15:13, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 58/3000 [00:18<11:40, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 59/3000 [00:19<11:06, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 60/3000 [00:19<12:30, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 61/3000 [00:19<12:00, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 64/3000 [00:20<12:22, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 66/3000 [00:20<11:56, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 68/3000 [00:21<08:59, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 71/3000 [00:21<06:21, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 73/3000 [00:21<06:46, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 74/3000 [00:22<12:46, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▎ | 75/3000 [00:22<16:55, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 76/3000 [00:23<16:06, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 77/3000 [00:23<14:56, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 79/3000 [00:23<13:29, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 81/3000 [00:24<09:17, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 84/3000 [00:24<06:24, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 86/3000 [00:24<07:54, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 88/3000 [00:25<11:02, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 89/3000 [00:25<10:03, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 91/3000 [00:26<14:11, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 93/3000 [00:27<17:27, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 94/3000 [00:27<17:04, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 95/3000 [00:28<19:12, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 96/3000 [00:28<17:52, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 97/3000 [00:28<17:02, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 101/3000 [00:29<08:41, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 102/3000 [00:29<11:58, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 104/3000 [00:30<09:41, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 105/3000 [00:30<11:02, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:30<08:48, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 108/3000 [00:31<11:17, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 111/3000 [00:31<11:12, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 112/3000 [00:32<11:58, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 115/3000 [00:32<10:08, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 118/3000 [00:32<06:40, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 120/3000 [00:33<08:22, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 121/3000 [00:33<08:54, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 122/3000 [00:33<09:16, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 123/3000 [00:34<12:00, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 125/3000 [00:34<11:03, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 126/3000 [00:35<15:19, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 129/3000 [00:35<09:32, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 130/3000 [00:36<12:29, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 131/3000 [00:36<11:46, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:36<10:19, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 135/3000 [00:37<11:21, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 136/3000 [00:37<10:52, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 138/3000 [00:38<12:35, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 140/3000 [00:38<08:29, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 143/3000 [00:38<08:17, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 144/3000 [00:39<09:02, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 145/3000 [00:39<10:46, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 146/3000 [00:39<10:31, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 148/3000 [00:39<08:28, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 149/3000 [00:40<09:50, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 150/3000 [00:40<10:22, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 152/3000 [00:41<12:47, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:41<14:10, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 155/3000 [00:41<10:08, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 158/3000 [00:42<08:31, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 161/3000 [00:42<08:42, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 163/3000 [00:43<12:12, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 166/3000 [00:44<12:40, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 167/3000 [00:44<11:04, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 169/3000 [00:45<10:43, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 171/3000 [00:45<10:50, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 176/3000 [00:46<10:28, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 178/3000 [00:47<08:47, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 180/3000 [00:47<10:12, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 183/3000 [00:47<06:39, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 186/3000 [00:49<13:12, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 189/3000 [00:49<09:05, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 191/3000 [00:49<07:25, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 192/3000 [00:50<11:27, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 193/3000 [00:50<14:00, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 196/3000 [00:51<09:37, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 198/3000 [00:51<07:54, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 199/3000 [00:52<10:17, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 202/3000 [00:52<10:24, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 203/3000 [00:53<10:33, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 204/3000 [00:53<12:34, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 206/3000 [00:53<11:57, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 208/3000 [00:54<09:57, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 209/3000 [00:54<11:35, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 213/3000 [00:55<07:33, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 214/3000 [00:55<09:03, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 216/3000 [00:56<11:55, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 221/3000 [00:56<07:39, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 223/3000 [00:57<10:48, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 226/3000 [00:58<09:16, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 227/3000 [00:58<08:22, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 229/3000 [00:58<10:32, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 231/3000 [00:59<11:57, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 232/3000 [01:00<15:25, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 234/3000 [01:00<13:11, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 236/3000 [01:00<09:47, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 237/3000 [01:01<09:19, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 238/3000 [01:01<09:37, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 241/3000 [01:01<08:39, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 242/3000 [01:02<08:45, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 243/3000 [01:02<11:47, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 244/3000 [01:02<12:00, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 246/3000 [01:03<09:36, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 248/3000 [01:03<10:38, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 249/3000 [01:03<11:23, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 250/3000 [01:04<16:31, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 252/3000 [01:04<12:26, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 254/3000 [01:05<12:46, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 256/3000 [01:05<10:47, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 260/3000 [01:06<07:14, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 262/3000 [01:06<08:44, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 264/3000 [01:06<06:34, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 265/3000 [01:07<08:05, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 268/3000 [01:08<09:28, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 269/3000 [01:08<09:19, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 270/3000 [01:08<10:33, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 271/3000 [01:09<15:41, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 274/3000 [01:09<11:18, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 278/3000 [01:10<09:51, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 281/3000 [01:10<06:51, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 282/3000 [01:11<09:39, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 284/3000 [01:11<08:54, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 285/3000 [01:11<10:07, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 287/3000 [01:12<08:27, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 288/3000 [01:12<09:56, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 289/3000 [01:13<18:04, 2.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 290/3000 [01:13<16:00, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 292/3000 [01:14<11:33, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 294/3000 [01:14<09:20, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 296/3000 [01:14<08:14, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 297/3000 [01:14<08:44, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 299/3000 [01:15<08:52, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 302/3000 [01:15<08:02, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 303/3000 [01:16<09:11, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [01:16<06:55, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 308/3000 [01:17<11:53, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 309/3000 [01:17<11:47, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 310/3000 [01:17<12:05, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 312/3000 [01:18<14:25, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 314/3000 [01:19<10:53, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 317/3000 [01:19<07:07, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 319/3000 [01:19<06:28, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 321/3000 [01:20<07:18, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 324/3000 [01:20<07:09, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 326/3000 [01:20<06:42, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 327/3000 [01:21<09:10, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 328/3000 [01:22<19:07, 2.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 330/3000 [01:22<14:39, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 331/3000 [01:23<14:52, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 333/3000 [01:23<10:44, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 335/3000 [01:23<10:02, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 338/3000 [01:24<07:26, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 341/3000 [01:24<06:32, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 343/3000 [01:24<05:00, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 345/3000 [01:25<10:10, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 346/3000 [01:27<22:08, 2.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 348/3000 [01:27<17:11, 2.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 350/3000 [01:28<12:18, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 352/3000 [01:28<08:49, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 355/3000 [01:28<07:20, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 357/3000 [01:28<08:03, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 358/3000 [01:29<08:41, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 360/3000 [01:29<07:23, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 361/3000 [01:30<12:16, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 362/3000 [01:30<11:33, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 364/3000 [01:30<08:54, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 365/3000 [01:31<12:03, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 366/3000 [01:32<19:57, 2.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 370/3000 [01:32<12:28, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 373/3000 [01:33<09:36, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▎ | 375/3000 [01:33<08:01, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 377/3000 [01:33<08:00, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 378/3000 [01:34<08:07, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 380/3000 [01:34<08:56, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 383/3000 [01:34<06:36, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 386/3000 [01:36<12:21, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 387/3000 [01:36<16:22, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 389/3000 [01:37<12:47, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 392/3000 [01:37<09:53, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 393/3000 [01:38<10:25, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 395/3000 [01:38<09:01, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 397/3000 [01:38<09:30, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 400/3000 [01:39<08:38, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 401/3000 [01:39<10:26, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 402/3000 [01:40<13:59, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 404/3000 [01:40<10:41, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 407/3000 [01:40<08:05, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [01:41<10:21, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 410/3000 [01:41<10:03, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 411/3000 [01:42<09:48, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 412/3000 [01:42<10:43, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 415/3000 [01:42<06:45, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 419/3000 [01:43<06:19, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 420/3000 [01:43<06:59, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 422/3000 [01:44<11:16, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 423/3000 [01:44<10:20, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 425/3000 [01:45<09:28, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 426/3000 [01:45<11:13, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 428/3000 [01:46<14:02, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 431/3000 [01:46<07:51, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 432/3000 [01:46<07:45, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 433/3000 [01:47<10:49, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 435/3000 [01:48<12:51, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 438/3000 [01:48<07:26, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 439/3000 [01:49<12:57, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 441/3000 [01:49<11:37, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 443/3000 [01:49<08:34, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 445/3000 [01:50<10:23, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 446/3000 [01:50<09:46, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 447/3000 [01:51<12:56, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 448/3000 [01:51<12:51, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 453/3000 [01:52<06:24, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 455/3000 [01:52<06:25, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 456/3000 [01:52<07:55, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 458/3000 [01:53<10:17, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 460/3000 [01:54<15:11, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 461/3000 [01:55<15:54, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 464/3000 [01:55<11:00, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 466/3000 [01:56<11:15, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 468/3000 [01:56<09:12, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 471/3000 [01:56<07:12, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 472/3000 [01:57<08:01, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 473/3000 [01:57<08:38, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 475/3000 [01:57<08:10, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 478/3000 [01:58<07:26, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 479/3000 [01:58<07:38, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 480/3000 [01:58<11:12, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 481/3000 [01:59<11:27, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 483/3000 [01:59<10:14, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 484/3000 [01:59<09:16, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [02:00<14:06, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 488/3000 [02:01<11:58, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 489/3000 [02:01<15:55, 2.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 491/3000 [02:02<11:56, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 492/3000 [02:02<12:11, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 493/3000 [02:02<13:00, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 495/3000 [02:03<10:24, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 497/3000 [02:03<09:10, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 498/3000 [02:03<09:24, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 499/3000 [02:04<09:14, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 500/3000 [02:04<12:30, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 502/3000 [02:04<10:02, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 503/3000 [02:05<10:35, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 504/3000 [02:05<10:22, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 505/3000 [02:05<12:52, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 506/3000 [02:06<12:07, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 509/3000 [02:06<10:47, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 511/3000 [02:07<07:40, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 514/3000 [02:07<06:01, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 516/3000 [02:07<06:33, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 517/3000 [02:07<06:26, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 519/3000 [02:08<11:11, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 520/3000 [02:08<10:17, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 521/3000 [02:09<10:20, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 523/3000 [02:09<09:28, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 525/3000 [02:10<09:00, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 527/3000 [02:10<07:21, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 529/3000 [02:10<09:29, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 530/3000 [02:11<08:47, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 533/3000 [02:11<09:05, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 535/3000 [02:12<08:04, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 537/3000 [02:12<09:41, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 538/3000 [02:13<10:42, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 540/3000 [02:13<12:16, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 542/3000 [02:14<10:08, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 543/3000 [02:14<11:46, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 545/3000 [02:15<10:21, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [02:15<08:49, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 549/3000 [02:16<11:46, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 551/3000 [02:16<10:35, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 554/3000 [02:16<06:29, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 557/3000 [02:17<06:58, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 558/3000 [02:17<06:57, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 559/3000 [02:18<11:00, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 560/3000 [02:18<13:18, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 561/3000 [02:18<12:29, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 563/3000 [02:19<10:25, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 565/3000 [02:19<08:56, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 569/3000 [02:20<07:06, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 571/3000 [02:20<07:11, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 574/3000 [02:21<09:55, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 576/3000 [02:22<09:06, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 578/3000 [02:22<07:48, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 580/3000 [02:22<08:01, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 584/3000 [02:23<05:39, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 585/3000 [02:23<06:07, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 586/3000 [02:23<06:47, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 587/3000 [02:24<10:43, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 589/3000 [02:24<09:34, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 591/3000 [02:25<14:48, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 594/3000 [02:26<08:19, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 596/3000 [02:26<08:41, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 599/3000 [02:27<06:19, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 601/3000 [02:27<06:34, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 603/3000 [02:27<08:05, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 604/3000 [02:28<11:27, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 606/3000 [02:28<09:25, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 608/3000 [02:29<09:26, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 609/3000 [02:29<10:12, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 611/3000 [02:30<11:38, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 613/3000 [02:30<09:52, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 615/3000 [02:31<08:12, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 617/3000 [02:31<09:25, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 619/3000 [02:32<08:25, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 620/3000 [02:32<07:57, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 621/3000 [02:32<11:59, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 623/3000 [02:33<09:28, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 624/3000 [02:33<12:04, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 625/3000 [02:33<11:24, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 626/3000 [02:34<10:44, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 627/3000 [02:34<14:31, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 629/3000 [02:35<13:37, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 632/3000 [02:35<09:40, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 633/3000 [02:36<10:11, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 634/3000 [02:36<13:29, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 636/3000 [02:37<11:28, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 639/3000 [02:37<07:37, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 640/3000 [02:37<08:22, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 641/3000 [02:38<09:02, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 642/3000 [02:38<11:03, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 645/3000 [02:38<07:54, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 646/3000 [02:39<07:18, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 647/3000 [02:39<08:48, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 651/3000 [02:40<09:42, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 652/3000 [02:40<09:20, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 654/3000 [02:41<08:54, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 655/3000 [02:41<07:44, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 657/3000 [02:41<07:01, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 659/3000 [02:42<10:40, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 662/3000 [02:42<07:06, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 663/3000 [02:43<06:51, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 665/3000 [02:43<07:22, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 668/3000 [02:44<09:50, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 669/3000 [02:44<10:26, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 670/3000 [02:45<11:00, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 672/3000 [02:45<08:49, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 676/3000 [02:46<06:39, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 677/3000 [02:46<07:24, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 679/3000 [02:47<08:28, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 681/3000 [02:47<08:17, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 683/3000 [02:47<07:14, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 685/3000 [02:48<08:48, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 686/3000 [02:48<11:31, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 687/3000 [02:49<11:40, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 688/3000 [02:49<12:50, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 690/3000 [02:50<13:01, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 692/3000 [02:50<10:02, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 694/3000 [02:51<09:16, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 696/3000 [02:51<06:54, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 697/3000 [02:51<07:07, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 700/3000 [02:52<06:56, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 701/3000 [02:52<06:39, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 704/3000 [02:53<09:23, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 705/3000 [02:53<09:24, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 706/3000 [02:53<09:52, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 708/3000 [02:54<09:26, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 710/3000 [02:55<11:17, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 713/3000 [02:55<08:59, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 716/3000 [02:56<06:23, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 718/3000 [02:56<04:48, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 721/3000 [02:56<05:33, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 722/3000 [02:57<06:59, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 725/3000 [02:57<08:16, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 726/3000 [02:58<10:44, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 727/3000 [02:58<10:20, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 728/3000 [02:59<15:23, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 730/3000 [02:59<12:39, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 731/3000 [03:00<11:36, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 732/3000 [03:00<12:45, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 734/3000 [03:00<09:38, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 738/3000 [03:01<05:47, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 739/3000 [03:01<05:47, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 740/3000 [03:01<08:53, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 742/3000 [03:02<07:00, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 743/3000 [03:02<08:56, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 744/3000 [03:02<09:25, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 745/3000 [03:03<11:57, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 747/3000 [03:04<13:11, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 749/3000 [03:04<10:19, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 752/3000 [03:05<08:15, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 755/3000 [03:05<06:26, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 757/3000 [03:06<06:04, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 758/3000 [03:06<05:32, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 762/3000 [03:06<04:39, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 765/3000 [03:08<09:42, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 767/3000 [03:08<07:50, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 768/3000 [03:08<08:49, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 769/3000 [03:09<16:52, 2.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 772/3000 [03:10<12:21, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 774/3000 [03:10<09:02, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 776/3000 [03:11<08:52, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 777/3000 [03:11<08:11, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 780/3000 [03:11<05:33, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 782/3000 [03:12<06:18, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 785/3000 [03:12<08:04, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 786/3000 [03:13<07:37, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 787/3000 [03:13<11:34, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 788/3000 [03:14<11:32, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 789/3000 [03:14<14:49, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 790/3000 [03:15<14:20, 2.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 791/3000 [03:15<13:40, 2.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 792/3000 [03:15<14:45, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 795/3000 [03:16<09:10, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 797/3000 [03:16<08:45, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 799/3000 [03:16<06:57, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 800/3000 [03:17<07:15, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 802/3000 [03:17<06:19, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 803/3000 [03:17<07:41, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 805/3000 [03:18<08:40, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 806/3000 [03:18<07:46, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 808/3000 [03:18<07:40, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 809/3000 [03:19<10:52, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 812/3000 [03:20<09:04, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 815/3000 [03:20<07:59, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 818/3000 [03:21<05:56, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 820/3000 [03:21<07:56, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 822/3000 [03:22<08:04, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 823/3000 [03:22<07:04, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 825/3000 [03:23<08:49, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 827/3000 [03:23<07:21, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 828/3000 [03:23<08:12, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 829/3000 [03:23<08:06, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 830/3000 [03:24<09:55, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 832/3000 [03:24<07:47, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 833/3000 [03:24<08:17, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 836/3000 [03:25<09:27, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 837/3000 [03:26<09:16, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 841/3000 [03:26<07:07, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 843/3000 [03:27<07:32, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 844/3000 [03:27<08:33, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 846/3000 [03:28<09:24, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 847/3000 [03:28<10:23, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 849/3000 [03:28<08:45, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 852/3000 [03:29<08:06, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 853/3000 [03:29<08:37, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 855/3000 [03:30<07:55, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 858/3000 [03:30<05:06, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 859/3000 [03:30<06:57, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 860/3000 [03:31<07:50, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 862/3000 [03:31<08:39, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 863/3000 [03:32<09:36, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 864/3000 [03:32<10:03, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 866/3000 [03:32<08:16, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 868/3000 [03:33<08:34, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 869/3000 [03:34<13:25, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 876/3000 [03:34<06:08, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 878/3000 [03:35<06:32, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 880/3000 [03:35<07:30, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 882/3000 [03:36<08:47, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 885/3000 [03:37<06:09, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 886/3000 [03:37<06:59, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 888/3000 [03:37<06:51, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 890/3000 [03:38<11:12, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [03:38<09:32, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 892/3000 [03:39<09:59, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 894/3000 [03:40<11:46, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 896/3000 [03:40<09:25, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 899/3000 [03:40<05:55, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 902/3000 [03:41<06:04, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 905/3000 [03:42<05:58, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 907/3000 [03:42<05:07, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 908/3000 [03:42<06:49, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 910/3000 [03:43<07:17, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 911/3000 [03:43<09:27, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 912/3000 [03:43<10:00, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 914/3000 [03:44<09:15, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 915/3000 [03:45<17:03, 2.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 916/3000 [03:45<14:26, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 917/3000 [03:46<12:28, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 920/3000 [03:46<07:24, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 922/3000 [03:47<08:42, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 923/3000 [03:47<07:24, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 926/3000 [03:47<05:30, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 928/3000 [03:47<05:27, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 931/3000 [03:48<05:35, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 933/3000 [03:48<04:33, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 934/3000 [03:49<11:31, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 936/3000 [03:50<12:49, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 939/3000 [03:50<08:20, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 941/3000 [03:51<08:23, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 942/3000 [03:51<09:43, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 943/3000 [03:52<09:04, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 945/3000 [03:52<09:26, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 946/3000 [03:52<08:54, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 948/3000 [03:53<06:34, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 950/3000 [03:53<05:59, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 951/3000 [03:53<06:25, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 952/3000 [03:54<07:58, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 953/3000 [03:54<10:34, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 954/3000 [03:54<10:25, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 955/3000 [03:55<09:59, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 957/3000 [03:55<10:15, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 958/3000 [03:56<08:42, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 959/3000 [03:56<11:31, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 960/3000 [03:56<11:04, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 963/3000 [03:57<07:26, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 966/3000 [03:57<04:57, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 968/3000 [03:57<04:18, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 970/3000 [03:58<07:17, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 971/3000 [03:58<08:49, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 972/3000 [03:59<09:09, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 976/3000 [04:00<09:29, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 977/3000 [04:00<08:53, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 978/3000 [04:01<10:21, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 980/3000 [04:01<08:16, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 983/3000 [04:01<05:57, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 984/3000 [04:03<12:13, 2.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 986/3000 [04:03<10:03, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 987/3000 [04:03<09:46, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 988/3000 [04:04<13:06, 2.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 991/3000 [04:05<11:47, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 993/3000 [04:05<10:11, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 996/3000 [04:06<07:45, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 997/3000 [04:06<09:50, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1000/3000 [04:07<06:36, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1002/3000 [04:07<07:37, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1004/3000 [04:08<08:10, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1007/3000 [04:08<07:08, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1008/3000 [04:09<10:38, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1009/3000 [04:10<12:40, 2.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1011/3000 [04:10<10:23, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1013/3000 [04:11<10:10, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1015/3000 [04:11<09:49, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1018/3000 [04:12<06:52, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1019/3000 [04:12<07:14, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1021/3000 [04:13<07:42, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1023/3000 [04:13<06:51, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1026/3000 [04:14<07:36, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1028/3000 [04:14<08:06, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1029/3000 [04:15<08:34, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1032/3000 [04:16<08:43, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1033/3000 [04:16<09:43, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1034/3000 [04:16<10:15, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1035/3000 [04:17<12:56, 2.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1036/3000 [04:17<11:51, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1038/3000 [04:18<09:00, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1040/3000 [04:18<09:58, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1042/3000 [04:19<09:43, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1046/3000 [04:19<05:47, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1047/3000 [04:20<08:23, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1048/3000 [04:20<09:38, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1050/3000 [04:21<07:42, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1051/3000 [04:21<07:44, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1052/3000 [04:21<08:03, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1054/3000 [04:22<06:44, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1056/3000 [04:23<11:03, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1058/3000 [04:23<11:54, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1059/3000 [04:24<14:02, 2.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1061/3000 [04:24<10:38, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1062/3000 [04:25<14:20, 2.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1066/3000 [04:26<08:21, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1067/3000 [04:26<07:23, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1072/3000 [04:27<06:58, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1075/3000 [04:28<06:25, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1077/3000 [04:29<12:57, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1078/3000 [04:29<11:14, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1080/3000 [04:30<10:27, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1082/3000 [04:31<10:30, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1083/3000 [04:31<10:44, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1084/3000 [04:32<11:49, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1086/3000 [04:32<10:32, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1087/3000 [04:32<10:40, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1090/3000 [04:33<07:50, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1092/3000 [04:34<07:58, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1093/3000 [04:34<09:09, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1095/3000 [04:34<07:16, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1096/3000 [04:34<07:17, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1099/3000 [04:35<06:04, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1100/3000 [04:36<12:12, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1102/3000 [04:36<08:49, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1104/3000 [04:37<08:58, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1106/3000 [04:37<08:25, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1108/3000 [04:38<06:54, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1110/3000 [04:38<08:28, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1112/3000 [04:39<09:08, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1113/3000 [04:39<09:31, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1115/3000 [04:40<07:47, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1118/3000 [04:40<07:22, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1119/3000 [04:41<06:40, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1120/3000 [04:41<06:35, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1121/3000 [04:41<07:38, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1123/3000 [04:42<08:40, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1124/3000 [04:42<10:21, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1126/3000 [04:43<09:49, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1129/3000 [04:43<06:59, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1131/3000 [04:44<06:02, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1133/3000 [04:45<10:23, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1134/3000 [04:45<12:10, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1136/3000 [04:46<09:11, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1139/3000 [04:46<05:48, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1141/3000 [04:46<05:15, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1143/3000 [04:47<06:15, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1144/3000 [04:47<06:25, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1145/3000 [04:48<08:23, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1146/3000 [04:48<10:46, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1148/3000 [04:49<09:53, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1149/3000 [04:50<12:57, 2.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1151/3000 [04:50<12:04, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1153/3000 [04:50<08:52, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1156/3000 [04:51<06:28, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1158/3000 [04:51<06:00, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1159/3000 [04:51<06:25, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1161/3000 [04:52<09:10, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1162/3000 [04:53<08:43, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1164/3000 [04:53<06:24, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1166/3000 [04:54<09:37, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1167/3000 [04:54<08:31, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [04:54<08:32, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1170/3000 [04:55<06:10, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1172/3000 [04:55<07:01, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1175/3000 [04:56<05:01, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1177/3000 [04:56<05:25, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1178/3000 [04:56<06:47, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1179/3000 [04:57<07:54, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1181/3000 [04:57<07:00, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1183/3000 [04:57<04:38, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1184/3000 [04:58<10:43, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1186/3000 [04:59<08:56, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1188/3000 [05:00<10:46, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1190/3000 [05:00<09:53, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1191/3000 [05:00<08:04, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1192/3000 [05:01<08:16, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1194/3000 [05:01<07:10, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1195/3000 [05:01<09:07, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1199/3000 [05:02<07:35, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1200/3000 [05:03<08:44, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1202/3000 [05:03<07:45, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1204/3000 [05:04<06:22, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1205/3000 [05:04<06:21, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1207/3000 [05:05<08:27, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1210/3000 [05:06<10:52, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1211/3000 [05:07<13:06, 2.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1215/3000 [05:07<07:26, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1217/3000 [05:07<05:56, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1218/3000 [05:08<06:01, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1220/3000 [05:09<09:32, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1221/3000 [05:09<08:25, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1222/3000 [05:09<08:08, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1223/3000 [05:09<08:03, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1226/3000 [05:10<06:29, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1228/3000 [05:10<05:50, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1229/3000 [05:11<09:34, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1232/3000 [05:12<06:28, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1234/3000 [05:13<09:06, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1235/3000 [05:14<13:14, 2.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1236/3000 [05:14<12:29, 2.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1239/3000 [05:15<08:57, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1240/3000 [05:16<13:39, 2.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1243/3000 [05:16<09:41, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1245/3000 [05:17<07:44, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1246/3000 [05:17<08:07, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1247/3000 [05:18<12:05, 2.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1248/3000 [05:18<10:42, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1249/3000 [05:19<14:46, 1.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1250/3000 [05:20<17:27, 1.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1252/3000 [05:20<11:50, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1253/3000 [05:21<12:20, 2.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1254/3000 [05:21<12:13, 2.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1255/3000 [05:21<11:36, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1257/3000 [05:22<08:18, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1259/3000 [05:22<07:34, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1262/3000 [05:23<06:12, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1263/3000 [05:23<06:28, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1264/3000 [05:24<10:15, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1266/3000 [05:24<08:19, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1268/3000 [05:25<09:19, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1269/3000 [05:25<08:30, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1270/3000 [05:25<08:05, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1272/3000 [05:26<09:43, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▎ | 1275/3000 [05:27<08:53, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1276/3000 [05:28<10:42, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1277/3000 [05:28<11:41, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1281/3000 [05:29<06:35, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1282/3000 [05:29<07:51, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1283/3000 [05:30<08:52, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1284/3000 [05:30<09:57, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1285/3000 [05:30<10:19, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1287/3000 [05:32<12:45, 2.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1288/3000 [05:32<11:57, 2.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1293/3000 [05:33<06:11, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1295/3000 [05:33<05:25, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1298/3000 [05:34<05:28, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1301/3000 [05:34<06:15, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1303/3000 [05:35<05:41, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1304/3000 [05:35<07:25, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1305/3000 [05:36<11:41, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1307/3000 [05:37<12:37, 2.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1310/3000 [05:38<07:50, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1312/3000 [05:38<07:05, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1315/3000 [05:39<06:01, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1320/3000 [05:39<04:58, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1322/3000 [05:40<04:39, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1323/3000 [05:41<11:01, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1324/3000 [05:41<10:17, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1325/3000 [05:42<11:51, 2.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1328/3000 [05:42<08:50, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1331/3000 [05:43<06:23, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1334/3000 [05:43<04:03, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1336/3000 [05:43<04:02, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1338/3000 [05:44<04:54, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1340/3000 [05:45<09:00, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1341/3000 [05:45<09:20, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1342/3000 [05:46<12:17, 2.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1344/3000 [05:47<09:15, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1346/3000 [05:47<07:52, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1348/3000 [05:47<06:58, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1350/3000 [05:48<05:39, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1352/3000 [05:48<05:05, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1353/3000 [05:48<05:50, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1354/3000 [05:48<05:54, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1357/3000 [05:49<05:00, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1359/3000 [05:50<06:32, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1360/3000 [05:50<06:36, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1362/3000 [05:50<06:49, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1364/3000 [05:51<08:56, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1365/3000 [05:52<07:54, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1367/3000 [05:52<06:31, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1369/3000 [05:53<08:03, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1371/3000 [05:53<06:41, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1372/3000 [05:54<08:04, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1373/3000 [05:54<08:13, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1375/3000 [05:54<06:22, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1376/3000 [05:55<07:58, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1377/3000 [05:55<09:09, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1379/3000 [05:55<06:45, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1381/3000 [05:56<05:16, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1383/3000 [05:56<07:23, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1385/3000 [05:57<05:08, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1386/3000 [05:57<06:28, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1388/3000 [05:58<06:47, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1390/3000 [05:58<05:54, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1391/3000 [05:58<07:00, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1392/3000 [05:59<09:31, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1394/3000 [06:00<08:28, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1395/3000 [06:00<08:05, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1396/3000 [06:00<08:05, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1398/3000 [06:01<08:51, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1400/3000 [06:01<08:08, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1401/3000 [06:02<07:55, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1403/3000 [06:02<05:45, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1404/3000 [06:02<06:28, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1406/3000 [06:03<05:56, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1407/3000 [06:03<09:52, 2.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1409/3000 [06:04<08:04, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1410/3000 [06:04<07:13, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1413/3000 [06:04<04:41, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1415/3000 [06:05<03:25, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1418/3000 [06:06<07:34, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1422/3000 [06:07<05:48, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1424/3000 [06:07<05:58, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1426/3000 [06:08<05:14, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1428/3000 [06:08<06:07, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1429/3000 [06:09<06:42, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1430/3000 [06:09<08:13, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1432/3000 [06:10<07:31, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1434/3000 [06:10<05:31, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1435/3000 [06:10<08:05, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1438/3000 [06:11<06:32, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1439/3000 [06:12<08:17, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1441/3000 [06:12<06:51, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1443/3000 [06:12<04:32, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1444/3000 [06:12<05:44, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1447/3000 [06:13<05:00, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1448/3000 [06:13<05:20, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1450/3000 [06:14<07:26, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1452/3000 [06:15<07:42, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1454/3000 [06:15<07:35, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1455/3000 [06:16<08:08, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [06:17<09:25, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1459/3000 [06:17<07:36, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1461/3000 [06:18<08:13, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1464/3000 [06:18<06:12, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1465/3000 [06:19<06:22, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1468/3000 [06:19<04:48, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1470/3000 [06:19<03:59, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1471/3000 [06:19<04:14, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1472/3000 [06:20<09:06, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1474/3000 [06:21<07:10, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1476/3000 [06:21<07:17, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1477/3000 [06:22<06:20, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1479/3000 [06:22<05:14, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1480/3000 [06:22<06:53, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1481/3000 [06:23<07:18, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1482/3000 [06:23<08:23, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1485/3000 [06:24<05:27, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1486/3000 [06:24<05:39, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1488/3000 [06:24<05:41, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1489/3000 [06:24<05:01, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1491/3000 [06:25<05:40, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1492/3000 [06:25<06:47, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1494/3000 [06:26<06:46, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1495/3000 [06:26<06:46, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1498/3000 [06:27<04:44, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1500/3000 [06:27<05:20, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1501/3000 [06:27<05:19, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1502/3000 [06:28<10:07, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1505/3000 [06:29<06:44, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1507/3000 [06:29<04:48, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1509/3000 [06:29<05:06, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1510/3000 [06:30<07:26, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1511/3000 [06:30<07:09, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1514/3000 [06:31<04:44, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1516/3000 [06:31<03:55, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1517/3000 [06:31<04:10, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1520/3000 [06:32<04:45, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1521/3000 [06:32<04:14, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1523/3000 [06:33<06:07, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1524/3000 [06:33<07:25, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1525/3000 [06:34<08:59, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1528/3000 [06:34<05:47, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1530/3000 [06:34<06:01, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1533/3000 [06:36<06:58, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1535/3000 [06:36<06:56, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1537/3000 [06:37<05:18, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1538/3000 [06:37<05:31, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1542/3000 [06:37<03:31, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1545/3000 [06:39<07:03, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1547/3000 [06:39<04:54, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1549/3000 [06:40<07:20, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1552/3000 [06:41<06:43, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1555/3000 [06:41<05:23, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1557/3000 [06:42<04:38, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1559/3000 [06:42<04:06, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1562/3000 [06:42<04:29, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1563/3000 [06:43<05:25, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1565/3000 [06:43<05:33, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1567/3000 [06:44<07:24, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1569/3000 [06:45<06:42, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1573/3000 [06:45<04:57, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1574/3000 [06:46<05:09, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1578/3000 [06:46<03:51, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1580/3000 [06:46<03:58, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1582/3000 [06:47<03:39, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1584/3000 [06:47<05:33, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1585/3000 [06:48<09:16, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1590/3000 [06:49<04:13, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1592/3000 [06:49<04:32, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1594/3000 [06:50<05:21, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1597/3000 [06:50<04:18, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1600/3000 [06:51<03:32, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1601/3000 [06:51<04:51, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1603/3000 [06:52<05:58, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1605/3000 [06:52<05:23, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1607/3000 [06:53<05:38, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1609/3000 [06:53<05:03, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1610/3000 [06:54<06:13, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1612/3000 [06:54<07:05, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1613/3000 [06:55<06:55, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1615/3000 [06:55<06:12, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1618/3000 [06:55<04:38, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1622/3000 [06:56<04:11, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1624/3000 [06:56<03:36, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1625/3000 [06:57<05:43, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1627/3000 [06:58<06:06, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1628/3000 [06:58<05:48, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1631/3000 [06:58<04:02, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1634/3000 [06:59<03:08, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1635/3000 [06:59<06:01, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1636/3000 [07:00<07:11, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1638/3000 [07:00<06:16, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1642/3000 [07:01<04:51, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1643/3000 [07:01<05:39, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1645/3000 [07:02<05:12, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1647/3000 [07:02<04:14, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1650/3000 [07:03<03:30, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1651/3000 [07:03<03:58, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1652/3000 [07:03<05:50, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1653/3000 [07:04<06:20, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1655/3000 [07:04<06:19, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1656/3000 [07:05<07:04, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1658/3000 [07:05<05:51, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1659/3000 [07:05<06:37, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1661/3000 [07:06<06:02, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1662/3000 [07:06<07:38, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1666/3000 [07:07<04:23, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1668/3000 [07:07<03:14, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1671/3000 [07:08<03:37, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1673/3000 [07:08<03:29, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1674/3000 [07:08<04:22, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1676/3000 [07:09<05:28, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1678/3000 [07:10<05:37, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [07:10<06:53, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1681/3000 [07:11<06:07, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1684/3000 [07:11<04:14, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1685/3000 [07:11<03:52, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1686/3000 [07:12<05:23, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1687/3000 [07:12<06:33, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1689/3000 [07:12<05:10, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1691/3000 [07:13<05:11, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1695/3000 [07:13<03:35, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1697/3000 [07:14<03:50, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1698/3000 [07:14<04:34, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1700/3000 [07:15<06:41, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1701/3000 [07:15<06:43, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1703/3000 [07:16<06:03, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [07:16<06:03, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1706/3000 [07:16<04:44, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1708/3000 [07:17<04:03, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1712/3000 [07:17<03:07, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1715/3000 [07:18<04:04, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1717/3000 [07:18<03:46, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1719/3000 [07:19<04:53, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1720/3000 [07:20<07:56, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1721/3000 [07:20<08:34, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▊ | 1725/3000 [07:21<04:14, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1728/3000 [07:21<03:43, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1730/3000 [07:21<03:01, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1732/3000 [07:22<03:15, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1735/3000 [07:22<03:08, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1737/3000 [07:23<03:53, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1738/3000 [07:23<03:46, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1740/3000 [07:24<07:40, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1742/3000 [07:24<05:32, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1743/3000 [07:25<08:29, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1745/3000 [07:26<06:33, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1748/3000 [07:26<03:34, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1750/3000 [07:26<03:08, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1753/3000 [07:26<02:47, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1755/3000 [07:28<06:16, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1757/3000 [07:28<05:07, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1758/3000 [07:28<05:49, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1760/3000 [07:29<06:02, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1761/3000 [07:29<06:31, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1764/3000 [07:30<04:43, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1765/3000 [07:31<07:24, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1768/3000 [07:31<04:37, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1773/3000 [07:31<02:12, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1775/3000 [07:32<05:32, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1777/3000 [07:33<06:12, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1779/3000 [07:34<05:22, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1780/3000 [07:34<04:41, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1782/3000 [07:34<04:57, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1786/3000 [07:35<03:57, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1790/3000 [07:35<02:15, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1792/3000 [07:36<04:35, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1794/3000 [07:37<04:28, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1796/3000 [07:37<03:59, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1797/3000 [07:38<06:28, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1798/3000 [07:38<06:42, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1802/3000 [07:39<03:49, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1805/3000 [07:39<03:14, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1806/3000 [07:39<03:59, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1809/3000 [07:40<03:27, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1810/3000 [07:40<05:14, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1812/3000 [07:41<05:25, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1813/3000 [07:42<06:31, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1814/3000 [07:42<06:05, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1817/3000 [07:42<03:54, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1820/3000 [07:43<04:07, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1821/3000 [07:43<04:26, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1824/3000 [07:44<03:28, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1825/3000 [07:44<03:55, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1826/3000 [07:45<06:28, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1827/3000 [07:45<06:26, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1831/3000 [07:45<03:51, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1832/3000 [07:46<04:43, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1833/3000 [07:46<04:43, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1835/3000 [07:46<03:37, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1836/3000 [07:47<05:01, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1839/3000 [07:47<03:35, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1842/3000 [07:48<03:45, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1843/3000 [07:48<03:32, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1844/3000 [07:49<07:57, 2.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1846/3000 [07:50<06:48, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1847/3000 [07:50<07:12, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1849/3000 [07:51<05:30, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1850/3000 [07:51<04:35, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1854/3000 [07:51<02:33, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1856/3000 [07:51<02:09, 8.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1858/3000 [07:52<05:12, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1860/3000 [07:53<04:12, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1863/3000 [07:54<04:50, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1867/3000 [07:54<03:54, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1869/3000 [07:54<03:32, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1870/3000 [07:55<04:02, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1871/3000 [07:55<04:21, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1873/3000 [07:56<05:30, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1874/3000 [07:56<05:26, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▎ | 1875/3000 [07:56<05:29, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1876/3000 [07:57<05:04, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1878/3000 [07:57<05:08, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1881/3000 [07:58<04:13, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1884/3000 [07:59<04:17, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1887/3000 [07:59<02:36, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1889/3000 [07:59<03:26, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1890/3000 [08:00<04:09, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1891/3000 [08:00<04:54, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1894/3000 [08:01<04:12, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1896/3000 [08:01<03:18, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1897/3000 [08:01<03:48, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1898/3000 [08:02<04:48, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1899/3000 [08:02<06:17, 2.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1901/3000 [08:03<04:37, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1902/3000 [08:03<04:42, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1903/3000 [08:03<05:49, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1904/3000 [08:04<05:38, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1905/3000 [08:04<05:42, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1906/3000 [08:04<06:17, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1907/3000 [08:05<06:31, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1909/3000 [08:05<05:21, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1911/3000 [08:06<04:52, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1912/3000 [08:06<05:03, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1913/3000 [08:06<05:19, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1914/3000 [08:07<05:47, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1915/3000 [08:07<05:21, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1918/3000 [08:08<04:44, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1919/3000 [08:08<04:32, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1920/3000 [08:09<06:16, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1921/3000 [08:09<06:39, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1922/3000 [08:09<06:18, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1923/3000 [08:10<06:22, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1926/3000 [08:10<04:57, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1929/3000 [08:11<02:53, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1931/3000 [08:11<02:56, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1932/3000 [08:11<03:18, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1935/3000 [08:12<03:43, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1938/3000 [08:12<02:26, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1940/3000 [08:13<02:26, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1942/3000 [08:13<03:54, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1943/3000 [08:14<06:26, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1945/3000 [08:15<04:58, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1946/3000 [08:15<04:52, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1947/3000 [08:15<04:30, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1948/3000 [08:15<04:46, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1951/3000 [08:16<03:42, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1955/3000 [08:17<03:44, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1956/3000 [08:17<05:03, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1957/3000 [08:18<05:05, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1959/3000 [08:18<04:37, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1962/3000 [08:19<03:08, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1963/3000 [08:19<03:05, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1964/3000 [08:19<04:30, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1966/3000 [08:20<03:35, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1967/3000 [08:20<03:43, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1969/3000 [08:20<03:57, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1971/3000 [08:21<03:23, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1972/3000 [08:21<03:17, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1973/3000 [08:21<04:14, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1975/3000 [08:22<04:33, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1979/3000 [08:23<03:36, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1980/3000 [08:23<05:07, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1982/3000 [08:24<04:18, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1984/3000 [08:24<04:35, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1987/3000 [08:25<03:35, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1990/3000 [08:25<02:35, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1992/3000 [08:26<03:58, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1994/3000 [08:26<03:29, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1996/3000 [08:27<05:39, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1999/3000 [08:28<04:07, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2003/3000 [08:28<03:19, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2005/3000 [08:29<04:27, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2008/3000 [08:30<03:04, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2010/3000 [08:30<02:48, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2011/3000 [08:31<04:27, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2012/3000 [08:31<04:29, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2014/3000 [08:31<04:01, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2015/3000 [08:32<04:09, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2020/3000 [08:33<03:52, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2021/3000 [08:33<04:05, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2022/3000 [08:33<03:59, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2023/3000 [08:34<04:52, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2025/3000 [08:34<04:26, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2028/3000 [08:35<02:57, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2031/3000 [08:35<02:48, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2033/3000 [08:36<03:59, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2034/3000 [08:36<04:33, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2035/3000 [08:37<05:15, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2036/3000 [08:37<05:08, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2037/3000 [08:37<04:48, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [08:38<03:41, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2041/3000 [08:38<04:02, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2043/3000 [08:39<03:52, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2045/3000 [08:39<03:28, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2046/3000 [08:39<03:57, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2047/3000 [08:40<04:27, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2049/3000 [08:41<05:33, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2051/3000 [08:41<04:37, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2053/3000 [08:41<03:25, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2056/3000 [08:42<02:32, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2059/3000 [08:43<03:22, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2060/3000 [08:43<03:01, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2061/3000 [08:43<03:32, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2062/3000 [08:43<03:45, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2064/3000 [08:44<03:13, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2068/3000 [08:45<03:24, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2070/3000 [08:45<02:38, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2071/3000 [08:45<03:22, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2072/3000 [08:46<03:19, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2074/3000 [08:47<04:55, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2076/3000 [08:47<04:26, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2078/3000 [08:48<04:04, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2080/3000 [08:48<05:15, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2082/3000 [08:49<04:32, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2085/3000 [08:49<03:37, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2086/3000 [08:50<04:11, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2088/3000 [08:50<03:23, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2089/3000 [08:50<03:28, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2092/3000 [08:51<03:26, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2095/3000 [08:51<02:36, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2097/3000 [08:52<02:45, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2099/3000 [08:53<04:22, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2101/3000 [08:53<03:58, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2103/3000 [08:54<03:08, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2106/3000 [08:54<03:18, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2108/3000 [08:55<03:02, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2112/3000 [08:55<01:30, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2114/3000 [08:56<03:39, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2116/3000 [08:57<03:21, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2117/3000 [08:57<05:07, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2119/3000 [08:58<03:59, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2122/3000 [08:58<03:01, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2124/3000 [08:58<02:33, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2125/3000 [08:59<02:55, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2127/3000 [09:00<04:24, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2129/3000 [09:00<04:00, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2130/3000 [09:00<03:25, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2131/3000 [09:01<04:27, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2133/3000 [09:01<03:28, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2135/3000 [09:02<03:34, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2137/3000 [09:03<05:30, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2139/3000 [09:03<04:15, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2142/3000 [09:04<03:03, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2144/3000 [09:04<03:17, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2146/3000 [09:05<02:53, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2147/3000 [09:05<02:57, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2148/3000 [09:05<03:12, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2150/3000 [09:06<03:16, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2151/3000 [09:06<04:18, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2154/3000 [09:07<02:51, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2155/3000 [09:07<03:10, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2157/3000 [09:07<03:26, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2160/3000 [09:08<02:15, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2161/3000 [09:08<02:50, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2163/3000 [09:10<05:57, 2.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2167/3000 [09:10<02:43, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2169/3000 [09:10<02:49, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2171/3000 [09:11<02:41, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2173/3000 [09:11<02:07, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2174/3000 [09:11<02:13, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2176/3000 [09:12<03:49, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2177/3000 [09:12<03:12, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2178/3000 [09:13<03:47, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2180/3000 [09:13<03:26, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2181/3000 [09:14<05:15, 2.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2182/3000 [09:15<06:50, 1.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2184/3000 [09:15<04:39, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2188/3000 [09:16<03:01, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2189/3000 [09:16<02:57, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2191/3000 [09:16<03:07, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2193/3000 [09:17<02:59, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2194/3000 [09:17<03:08, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2197/3000 [09:17<02:14, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2198/3000 [09:18<02:34, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2200/3000 [09:19<04:21, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2201/3000 [09:19<04:31, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2205/3000 [09:20<03:11, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2206/3000 [09:20<03:29, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2208/3000 [09:21<02:50, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2210/3000 [09:21<03:38, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2211/3000 [09:22<04:08, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2213/3000 [09:22<03:34, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2214/3000 [09:22<03:11, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2216/3000 [09:23<03:53, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2217/3000 [09:23<03:10, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2220/3000 [09:24<02:15, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2223/3000 [09:24<02:10, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2224/3000 [09:24<02:27, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2226/3000 [09:25<02:11, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2227/3000 [09:26<04:20, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2229/3000 [09:26<03:23, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2230/3000 [09:26<03:31, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2231/3000 [09:27<04:41, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2232/3000 [09:27<04:40, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2234/3000 [09:28<04:18, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2237/3000 [09:29<03:05, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2239/3000 [09:29<02:08, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2242/3000 [09:29<02:25, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2244/3000 [09:30<02:27, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2246/3000 [09:31<03:10, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2247/3000 [09:31<02:40, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2248/3000 [09:31<03:18, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2249/3000 [09:31<03:31, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2252/3000 [09:32<02:30, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2253/3000 [09:33<05:01, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2255/3000 [09:34<05:02, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2256/3000 [09:34<04:14, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2259/3000 [09:34<02:30, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2260/3000 [09:34<02:43, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2262/3000 [09:35<03:09, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2264/3000 [09:35<02:51, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2265/3000 [09:36<03:29, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2267/3000 [09:36<03:01, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2268/3000 [09:37<03:19, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2269/3000 [09:37<03:30, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2271/3000 [09:37<03:03, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2272/3000 [09:38<02:48, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2275/3000 [09:38<02:13, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2277/3000 [09:39<02:31, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2278/3000 [09:39<03:22, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2282/3000 [09:40<02:42, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2283/3000 [09:40<03:20, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2284/3000 [09:41<04:28, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2285/3000 [09:42<05:27, 2.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2287/3000 [09:42<04:06, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2288/3000 [09:43<03:21, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2289/3000 [09:43<03:50, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2292/3000 [09:43<02:19, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2294/3000 [09:44<02:08, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2295/3000 [09:44<01:53, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2296/3000 [09:44<03:02, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2298/3000 [09:45<03:51, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2299/3000 [09:46<04:08, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2300/3000 [09:46<05:11, 2.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2301/3000 [09:47<04:59, 2.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2303/3000 [09:47<03:47, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2305/3000 [09:47<03:03, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2306/3000 [09:48<02:55, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2309/3000 [09:48<02:53, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2312/3000 [09:49<01:53, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2313/3000 [09:49<03:15, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2314/3000 [09:50<03:02, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2316/3000 [09:50<02:59, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2317/3000 [09:50<02:34, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2320/3000 [09:51<02:23, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2323/3000 [09:52<02:38, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2324/3000 [09:52<03:08, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2325/3000 [09:53<02:58, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2326/3000 [09:53<02:54, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2328/3000 [09:53<02:50, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2331/3000 [09:54<01:51, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2333/3000 [09:54<02:35, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2334/3000 [09:55<03:27, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2335/3000 [09:56<05:31, 2.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2338/3000 [09:56<03:17, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2340/3000 [09:56<02:37, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2343/3000 [09:57<02:02, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2344/3000 [09:57<02:11, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2346/3000 [09:57<02:11, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2347/3000 [09:58<02:03, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2350/3000 [09:59<02:33, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2351/3000 [09:59<03:32, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2353/3000 [10:00<03:00, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2354/3000 [10:00<04:32, 2.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2355/3000 [10:01<03:52, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2357/3000 [10:02<04:15, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2359/3000 [10:02<03:15, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2361/3000 [10:02<02:35, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2364/3000 [10:03<02:04, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2365/3000 [10:03<01:59, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2366/3000 [10:03<02:15, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2367/3000 [10:03<02:25, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2370/3000 [10:04<02:24, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2372/3000 [10:05<02:19, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2373/3000 [10:05<02:29, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2374/3000 [10:05<03:35, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2375/3000 [10:06<03:20, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2377/3000 [10:06<02:32, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2378/3000 [10:07<03:49, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2379/3000 [10:07<04:37, 2.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2382/3000 [10:08<02:52, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2383/3000 [10:08<02:55, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2385/3000 [10:08<02:13, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2386/3000 [10:09<03:50, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2388/3000 [10:10<03:08, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2389/3000 [10:10<02:47, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2391/3000 [10:10<02:25, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2393/3000 [10:11<02:28, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2394/3000 [10:11<02:09, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2396/3000 [10:11<01:59, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2397/3000 [10:12<02:05, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2399/3000 [10:12<02:09, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2400/3000 [10:13<02:40, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2401/3000 [10:13<04:10, 2.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2403/3000 [10:14<03:10, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2405/3000 [10:14<02:21, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2406/3000 [10:15<03:54, 2.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2409/3000 [10:16<03:06, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2410/3000 [10:16<03:14, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2413/3000 [10:17<02:03, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2414/3000 [10:17<02:38, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2415/3000 [10:17<02:45, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2418/3000 [10:18<01:48, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2419/3000 [10:18<02:06, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2421/3000 [10:18<01:50, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2422/3000 [10:19<02:55, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2423/3000 [10:19<02:55, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2426/3000 [10:20<02:03, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2427/3000 [10:20<02:23, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2428/3000 [10:21<02:37, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2431/3000 [10:22<03:12, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2432/3000 [10:22<03:25, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2433/3000 [10:23<03:02, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2434/3000 [10:23<02:49, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2435/3000 [10:23<02:55, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2436/3000 [10:23<02:44, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2437/3000 [10:24<03:12, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2441/3000 [10:24<01:39, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2442/3000 [10:25<01:43, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2444/3000 [10:25<01:27, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2445/3000 [10:25<01:34, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2447/3000 [10:25<01:48, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2448/3000 [10:26<02:01, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2449/3000 [10:27<03:10, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2452/3000 [10:27<02:21, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2453/3000 [10:28<02:41, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2454/3000 [10:28<02:48, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2456/3000 [10:28<02:13, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2458/3000 [10:29<02:08, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2459/3000 [10:29<02:19, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2461/3000 [10:29<02:10, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2463/3000 [10:30<01:53, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2464/3000 [10:30<02:15, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2465/3000 [10:31<03:00, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2466/3000 [10:31<03:26, 2.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2467/3000 [10:32<03:42, 2.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2468/3000 [10:32<03:47, 2.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2470/3000 [10:33<02:46, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2471/3000 [10:33<02:41, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2473/3000 [10:33<02:12, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2474/3000 [10:34<03:16, 2.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2476/3000 [10:34<02:43, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2479/3000 [10:35<01:46, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2482/3000 [10:35<01:27, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2483/3000 [10:35<01:40, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2484/3000 [10:36<03:19, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2485/3000 [10:37<02:56, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2486/3000 [10:37<03:03, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2488/3000 [10:38<02:47, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2491/3000 [10:38<01:46, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2494/3000 [10:39<01:39, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2495/3000 [10:39<02:05, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2496/3000 [10:39<02:17, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2497/3000 [10:40<03:13, 2.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2499/3000 [10:41<02:57, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2501/3000 [10:41<01:49, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2503/3000 [10:41<01:39, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2504/3000 [10:41<01:45, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2507/3000 [10:42<01:10, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2508/3000 [10:42<02:07, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2509/3000 [10:43<02:31, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2511/3000 [10:44<02:22, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2512/3000 [10:44<02:05, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2513/3000 [10:44<02:32, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2516/3000 [10:44<01:37, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2517/3000 [10:45<01:37, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2518/3000 [10:45<01:41, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2519/3000 [10:45<02:14, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2521/3000 [10:46<01:57, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2524/3000 [10:46<01:14, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2525/3000 [10:47<02:25, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2526/3000 [10:47<02:41, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2527/3000 [10:48<02:47, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2529/3000 [10:48<02:12, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2530/3000 [10:48<01:57, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2532/3000 [10:49<01:26, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2533/3000 [10:49<01:38, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2534/3000 [10:50<02:41, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2535/3000 [10:50<03:11, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2537/3000 [10:51<02:28, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2539/3000 [10:51<01:37, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2540/3000 [10:51<01:32, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2542/3000 [10:51<01:18, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2543/3000 [10:52<02:13, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2545/3000 [10:52<01:44, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2547/3000 [10:52<01:25, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2548/3000 [10:53<01:46, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2550/3000 [10:53<01:49, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2552/3000 [10:54<01:39, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2554/3000 [10:55<02:34, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2556/3000 [10:55<01:43, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2557/3000 [10:56<02:08, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2559/3000 [10:56<02:11, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2560/3000 [10:57<02:15, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2561/3000 [10:57<02:02, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2563/3000 [10:57<02:08, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2565/3000 [10:58<02:01, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2568/3000 [10:58<01:11, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2570/3000 [10:58<00:59, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2571/3000 [10:59<02:01, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2573/3000 [11:00<01:41, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2574/3000 [11:00<02:37, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2579/3000 [11:01<01:17, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2581/3000 [11:02<01:39, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2582/3000 [11:02<02:18, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2583/3000 [11:03<02:16, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2584/3000 [11:03<02:08, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2586/3000 [11:04<01:58, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2587/3000 [11:04<01:54, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2591/3000 [11:04<01:20, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2593/3000 [11:05<01:02, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2594/3000 [11:05<01:14, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2595/3000 [11:05<01:29, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2598/3000 [11:05<01:04, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2600/3000 [11:06<01:10, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2601/3000 [11:07<02:12, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2602/3000 [11:07<02:43, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2603/3000 [11:08<02:22, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2605/3000 [11:08<01:42, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2607/3000 [11:08<01:41, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2608/3000 [11:09<01:43, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2609/3000 [11:09<01:39, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2611/3000 [11:09<01:15, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2613/3000 [11:10<01:23, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2614/3000 [11:10<01:52, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2615/3000 [11:10<01:45, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2617/3000 [11:11<01:20, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2618/3000 [11:11<02:06, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2620/3000 [11:12<01:41, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2623/3000 [11:12<01:18, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2625/3000 [11:13<01:12, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2626/3000 [11:13<01:12, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2629/3000 [11:13<01:13, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2631/3000 [11:14<01:40, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2633/3000 [11:15<01:32, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2635/3000 [11:15<01:51, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2637/3000 [11:16<01:20, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2638/3000 [11:16<01:24, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2642/3000 [11:16<00:56, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2643/3000 [11:17<01:25, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2646/3000 [11:18<01:25, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2648/3000 [11:18<01:16, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2649/3000 [11:18<01:26, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2650/3000 [11:19<01:40, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2653/3000 [11:20<01:55, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2656/3000 [11:20<01:13, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2657/3000 [11:21<01:55, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [11:21<01:43, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2662/3000 [11:22<01:13, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2664/3000 [11:22<00:57, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2667/3000 [11:23<01:16, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2669/3000 [11:24<01:15, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2670/3000 [11:25<02:24, 2.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2671/3000 [11:25<02:17, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2675/3000 [11:26<01:11, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2676/3000 [11:26<01:37, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2677/3000 [11:27<02:03, 2.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2680/3000 [11:28<01:27, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2682/3000 [11:28<01:18, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2683/3000 [11:28<01:19, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2685/3000 [11:29<01:27, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2686/3000 [11:29<01:16, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2688/3000 [11:30<01:18, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2689/3000 [11:30<01:30, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2693/3000 [11:30<00:52, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2695/3000 [11:31<00:48, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2697/3000 [11:31<00:55, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2698/3000 [11:32<01:15, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2699/3000 [11:32<01:24, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2702/3000 [11:33<01:29, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2704/3000 [11:33<00:59, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2705/3000 [11:34<01:26, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2707/3000 [11:34<01:24, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2709/3000 [11:35<01:15, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2710/3000 [11:35<01:13, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2711/3000 [11:35<01:14, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2712/3000 [11:35<01:16, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2714/3000 [11:36<01:25, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2717/3000 [11:37<01:11, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2718/3000 [11:37<01:10, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2719/3000 [11:37<01:11, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2720/3000 [11:38<01:12, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2723/3000 [11:38<00:49, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2724/3000 [11:38<01:03, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2727/3000 [11:39<00:47, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2728/3000 [11:39<00:54, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2730/3000 [11:39<00:54, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2731/3000 [11:41<01:45, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2734/3000 [11:41<01:08, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2735/3000 [11:41<01:19, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2736/3000 [11:42<01:26, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2739/3000 [11:42<01:01, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2742/3000 [11:43<00:51, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2744/3000 [11:43<00:54, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2747/3000 [11:44<00:50, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2748/3000 [11:45<01:12, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2749/3000 [11:45<01:32, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2750/3000 [11:46<01:50, 2.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2753/3000 [11:46<01:03, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2754/3000 [11:46<01:03, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2756/3000 [11:47<00:55, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2758/3000 [11:47<00:55, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2760/3000 [11:47<00:41, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2761/3000 [11:48<01:00, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2765/3000 [11:48<00:36, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2767/3000 [11:49<00:47, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2768/3000 [11:49<00:54, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2769/3000 [11:50<00:52, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2772/3000 [11:50<00:41, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2774/3000 [11:50<00:36, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▎| 2775/3000 [11:51<00:40, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2777/3000 [11:51<00:48, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2780/3000 [11:52<00:46, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [11:52<00:41, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2782/3000 [11:53<01:16, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2784/3000 [11:53<01:04, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2788/3000 [11:54<00:39, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2789/3000 [11:54<00:38, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2790/3000 [11:54<00:48, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2793/3000 [11:55<00:37, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2795/3000 [11:55<00:35, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2797/3000 [11:56<01:02, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2800/3000 [11:57<00:45, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2801/3000 [11:57<00:45, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2803/3000 [11:58<00:52, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2806/3000 [11:58<00:31, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2808/3000 [11:59<00:44, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2811/3000 [11:59<00:35, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2812/3000 [12:00<00:59, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2813/3000 [12:01<01:22, 2.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2815/3000 [12:01<01:06, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2817/3000 [12:01<00:42, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2818/3000 [12:01<00:39, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2821/3000 [12:02<00:42, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2822/3000 [12:03<00:59, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2825/3000 [12:04<00:41, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2827/3000 [12:04<00:31, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2828/3000 [12:04<00:36, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2829/3000 [12:05<00:46, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2831/3000 [12:05<00:35, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2835/3000 [12:06<00:30, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2837/3000 [12:06<00:31, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2838/3000 [12:07<00:47, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2839/3000 [12:07<00:50, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2841/3000 [12:08<00:49, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2843/3000 [12:08<00:35, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2844/3000 [12:09<00:48, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2847/3000 [12:09<00:34, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2848/3000 [12:09<00:39, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2849/3000 [12:10<01:00, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2851/3000 [12:10<00:42, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2852/3000 [12:11<00:50, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2853/3000 [12:11<00:48, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2855/3000 [12:12<00:42, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2856/3000 [12:12<00:53, 2.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2857/3000 [12:13<00:47, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2859/3000 [12:13<00:37, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2864/3000 [12:13<00:17, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2866/3000 [12:14<00:23, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2867/3000 [12:15<00:35, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2869/3000 [12:15<00:30, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2871/3000 [12:16<00:34, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2872/3000 [12:16<00:31, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2873/3000 [12:16<00:37, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2874/3000 [12:17<00:45, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2876/3000 [12:17<00:38, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2877/3000 [12:18<01:04, 1.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2882/3000 [12:19<00:24, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2883/3000 [12:19<00:25, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2884/3000 [12:20<00:40, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2888/3000 [12:20<00:22, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2892/3000 [12:21<00:15, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2894/3000 [12:21<00:18, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2896/3000 [12:22<00:17, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2897/3000 [12:22<00:21, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2898/3000 [12:22<00:25, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2899/3000 [12:23<00:34, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2900/3000 [12:23<00:33, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2901/3000 [12:24<00:47, 2.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2902/3000 [12:24<00:40, 2.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2903/3000 [12:25<00:35, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2910/3000 [12:25<00:11, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2912/3000 [12:26<00:11, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2914/3000 [12:27<00:22, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2915/3000 [12:27<00:27, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2916/3000 [12:28<00:29, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2917/3000 [12:28<00:27, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2918/3000 [12:28<00:28, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2919/3000 [12:29<00:26, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2921/3000 [12:29<00:18, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2925/3000 [12:30<00:15, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2927/3000 [12:30<00:13, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2928/3000 [12:30<00:15, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2931/3000 [12:31<00:12, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2932/3000 [12:32<00:24, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2933/3000 [12:32<00:26, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2934/3000 [12:33<00:24, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2936/3000 [12:33<00:17, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2940/3000 [12:34<00:12, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2941/3000 [12:34<00:13, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2944/3000 [12:35<00:12, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2945/3000 [12:35<00:12, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2947/3000 [12:36<00:15, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2950/3000 [12:36<00:11, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2951/3000 [12:37<00:13, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2952/3000 [12:37<00:15, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2953/3000 [12:38<00:14, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2954/3000 [12:38<00:15, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2955/3000 [12:38<00:13, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2956/3000 [12:38<00:13, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2958/3000 [12:39<00:10, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2959/3000 [12:39<00:09, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2963/3000 [12:39<00:05, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2966/3000 [12:41<00:08, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2968/3000 [12:41<00:07, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2971/3000 [12:42<00:07, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2972/3000 [12:42<00:06, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2975/3000 [12:43<00:04, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2977/3000 [12:43<00:03, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2980/3000 [12:43<00:03, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2981/3000 [12:44<00:03, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2982/3000 [12:45<00:07, 2.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2983/3000 [12:45<00:06, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2984/3000 [12:45<00:05, 2.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2986/3000 [12:46<00:03, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2987/3000 [12:46<00:04, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2988/3000 [12:47<00:03, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2990/3000 [12:47<00:02, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2991/3000 [12:47<00:02, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2992/3000 [12:47<00:02, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2993/3000 [12:48<00:02, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2995/3000 [12:48<00:01, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2996/3000 [12:48<00:01, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2998/3000 [12:49<00:00, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [12:49<00:00, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-09 21:53:36.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mEvaluation metrics (after optimization): {'f1': 0.0, 'em': 0.0, 'acc': 0.462}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b38a4ea0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'f1': 0.0, 'em': 0.0, 'acc': 0.462}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "78396e99",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"np.random.seed(2024)\n",
"#generated_workflow\n",
"qa_workflow = {\n",
" \"goal\": \"Provide a concise answer to the question using relevant context. The answer must be straightforward and avoid unnecessary explanations.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"generate_answer\",\n",
" \"description\": \"Extract and formulate an answer from the given context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question that needs to be answered.\"},\n",
" {\"name\": \"context\", \"type\": \"str\", \"required\": True, \"description\": \"The background information pertinent to the question.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ]\n",
"}\n",
"\n",
"api_key = \"\"\n",
"# Function to encode the image\n",
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = api_key\n",
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
"\n",
"\n",
"llm_config = OpenAILLMConfig(model=\"gpt-4o-mini-2024-07-18\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"llm = OpenAILLM(config=llm_config)\n",
"executor_llm = OpenAILLM(config=llm_config)\n",
"optimizer_llm = OpenAILLM(config=llm_config)\n",
"\n",
"benchmark = HotPotQASplits()\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(qa_workflow)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n",
"\n",
"\n",
"len(benchmark._fulldata)\n",
"\n",
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")\n",
"\n",
"results = np.random.choice(benchmark._train_data, size=50, replace=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fccdf782",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
"id": "8ae2db95",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"np.random.seed(2024)\n",
"out = np.random.choice(benchmark._train_data, size=50, replace=False)\n",
"benchmark._train_data = out\n",
"benchmark._dev_data = out"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "da843e68",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 13,
"id": "d686ee20",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 15:57:22.453\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n",
"Evaluating workflow: 0%| | 1/3000 [00:04<3:58:20, 4.77s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 5/3000 [00:05<36:51, 1.35it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 9/3000 [00:06<16:58, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 11/3000 [00:06<12:44, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 13/3000 [00:07<13:58, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 16/3000 [00:07<11:59, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 17/3000 [00:08<12:20, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 18/3000 [00:08<11:54, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 20/3000 [00:09<14:31, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 21/3000 [00:10<22:20, 2.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 26/3000 [00:10<10:25, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 27/3000 [00:10<10:04, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 28/3000 [00:11<15:55, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 30/3000 [00:11<13:41, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 31/3000 [00:12<12:40, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 34/3000 [00:13<13:06, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 37/3000 [00:13<11:16, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 39/3000 [00:14<10:12, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 41/3000 [00:14<09:55, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 42/3000 [00:14<14:01, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 44/3000 [00:15<14:05, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 46/3000 [00:15<11:06, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 49/3000 [00:16<11:17, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 51/3000 [00:16<09:19, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 53/3000 [00:17<09:35, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 54/3000 [00:18<17:02, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 55/3000 [00:18<21:33, 2.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 56/3000 [00:19<20:23, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 57/3000 [00:19<18:10, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 60/3000 [00:20<11:19, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 62/3000 [00:20<07:55, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 64/3000 [00:21<16:31, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 67/3000 [00:21<11:19, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 68/3000 [00:22<14:39, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 70/3000 [00:22<14:06, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 71/3000 [00:23<14:55, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 72/3000 [00:23<15:30, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 74/3000 [00:23<11:14, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 78/3000 [00:24<08:24, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 79/3000 [00:24<11:31, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 80/3000 [00:25<12:59, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 81/3000 [00:26<20:55, 2.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 85/3000 [00:26<10:26, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 87/3000 [00:27<12:52, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 90/3000 [00:28<11:31, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 91/3000 [00:28<10:17, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 92/3000 [00:29<17:12, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 93/3000 [00:29<17:06, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 96/3000 [00:29<10:56, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 97/3000 [00:30<11:12, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 100/3000 [00:30<09:31, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 101/3000 [00:31<14:04, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 102/3000 [00:31<13:22, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 103/3000 [00:31<17:30, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 105/3000 [00:32<18:04, 2.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:33<15:01, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 109/3000 [00:34<17:21, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 111/3000 [00:34<14:01, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 112/3000 [00:34<13:19, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 114/3000 [00:35<12:37, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 118/3000 [00:36<10:40, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 119/3000 [00:36<13:18, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 123/3000 [00:37<08:59, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 124/3000 [00:37<08:11, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 125/3000 [00:38<17:18, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 126/3000 [00:39<22:22, 2.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 128/3000 [00:39<15:23, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 131/3000 [00:39<10:56, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 132/3000 [00:39<10:19, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:40<11:47, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 134/3000 [00:40<11:34, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 135/3000 [00:40<14:06, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 136/3000 [00:41<14:48, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 137/3000 [00:41<16:43, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 138/3000 [00:42<23:50, 2.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 141/3000 [00:43<14:45, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 143/3000 [00:43<14:00, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 144/3000 [00:44<17:34, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 147/3000 [00:44<10:40, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 148/3000 [00:45<13:17, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 152/3000 [00:45<08:32, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 155/3000 [00:46<09:13, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 156/3000 [00:46<12:22, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 158/3000 [00:47<12:07, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 159/3000 [00:48<20:04, 2.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 160/3000 [00:48<19:30, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 164/3000 [00:49<14:17, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 166/3000 [00:49<11:22, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 167/3000 [00:50<11:15, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 171/3000 [00:50<06:53, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 172/3000 [00:50<09:01, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 174/3000 [00:51<08:59, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 175/3000 [00:51<10:09, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 176/3000 [00:52<14:07, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 177/3000 [00:52<13:01, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 178/3000 [00:52<15:23, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 179/3000 [00:53<14:00, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 180/3000 [00:54<21:19, 2.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 181/3000 [00:54<23:40, 1.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 183/3000 [00:55<17:50, 2.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 184/3000 [00:55<18:07, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 187/3000 [00:56<12:56, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 190/3000 [00:56<09:20, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 192/3000 [00:57<09:07, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 196/3000 [00:58<09:15, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 197/3000 [00:58<09:03, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 198/3000 [00:58<10:39, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 200/3000 [00:59<10:50, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 201/3000 [00:59<12:26, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 203/3000 [01:00<12:30, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 205/3000 [01:00<16:38, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 206/3000 [01:01<21:36, 2.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 208/3000 [01:01<14:57, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 211/3000 [01:02<13:15, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 214/3000 [01:03<11:43, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 216/3000 [01:03<08:36, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 218/3000 [01:03<07:14, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 219/3000 [01:04<10:05, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 222/3000 [01:05<10:58, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 223/3000 [01:05<10:47, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 225/3000 [01:05<11:59, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 226/3000 [01:06<10:43, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 227/3000 [01:06<15:37, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 228/3000 [01:06<15:01, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 229/3000 [01:07<18:50, 2.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 233/3000 [01:08<09:13, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 235/3000 [01:08<10:58, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 237/3000 [01:08<08:17, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 239/3000 [01:09<09:41, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 244/3000 [01:09<06:37, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 246/3000 [01:10<09:52, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 247/3000 [01:10<09:52, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 248/3000 [01:11<09:51, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 249/3000 [01:11<12:18, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 250/3000 [01:12<17:20, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 251/3000 [01:13<21:37, 2.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 253/3000 [01:13<16:14, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 255/3000 [01:13<13:42, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 256/3000 [01:14<11:54, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 261/3000 [01:14<06:13, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 262/3000 [01:15<10:37, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 264/3000 [01:15<09:28, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 266/3000 [01:16<11:54, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 267/3000 [01:16<10:01, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 268/3000 [01:16<10:30, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 270/3000 [01:17<11:39, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 272/3000 [01:18<14:58, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 274/3000 [01:18<15:24, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 278/3000 [01:19<08:48, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 281/3000 [01:19<07:03, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 283/3000 [01:19<06:38, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 284/3000 [01:20<11:10, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 285/3000 [01:22<28:39, 1.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 288/3000 [01:22<18:12, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 289/3000 [01:23<21:29, 2.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 290/3000 [01:23<19:41, 2.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 295/3000 [01:24<08:31, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 297/3000 [01:24<07:10, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 299/3000 [01:24<08:07, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 301/3000 [01:25<10:53, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 302/3000 [01:25<11:55, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 303/3000 [01:26<13:02, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 304/3000 [01:26<15:08, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 305/3000 [01:27<14:21, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [01:27<15:45, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 309/3000 [01:28<12:29, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 310/3000 [01:28<13:33, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 311/3000 [01:28<12:36, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 315/3000 [01:29<08:02, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 319/3000 [01:29<07:00, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 320/3000 [01:30<07:37, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 321/3000 [01:30<09:31, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 322/3000 [01:31<14:22, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 323/3000 [01:31<14:41, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 325/3000 [01:32<15:09, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 326/3000 [01:32<12:25, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 328/3000 [01:32<11:13, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 330/3000 [01:33<10:47, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 331/3000 [01:33<09:40, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 332/3000 [01:33<09:49, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 333/3000 [01:34<10:52, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 338/3000 [01:34<05:22, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [01:34<05:47, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 341/3000 [01:35<06:49, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 342/3000 [01:35<06:59, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 343/3000 [01:35<09:34, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 345/3000 [01:37<19:15, 2.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 346/3000 [01:37<18:43, 2.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 349/3000 [01:38<12:11, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 351/3000 [01:38<12:10, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 353/3000 [01:39<09:43, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 355/3000 [01:39<07:59, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 357/3000 [01:39<08:17, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 360/3000 [01:40<08:04, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 361/3000 [01:41<16:15, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 364/3000 [01:42<14:38, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 365/3000 [01:42<14:38, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 367/3000 [01:43<12:49, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 369/3000 [01:43<10:17, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 370/3000 [01:43<08:53, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 372/3000 [01:43<08:29, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 374/3000 [01:44<11:04, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 376/3000 [01:45<10:19, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 377/3000 [01:45<12:56, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 379/3000 [01:46<12:14, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 381/3000 [01:46<11:03, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 382/3000 [01:46<12:03, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 384/3000 [01:47<11:54, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 388/3000 [01:47<07:07, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 389/3000 [01:48<08:21, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 391/3000 [01:48<08:42, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 392/3000 [01:48<09:33, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 393/3000 [01:49<11:10, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 394/3000 [01:49<14:50, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 396/3000 [01:50<12:49, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 398/3000 [01:51<16:11, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 400/3000 [01:51<12:53, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 401/3000 [01:52<18:59, 2.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 402/3000 [01:52<16:44, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 406/3000 [01:53<08:24, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [01:53<05:58, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 413/3000 [01:53<05:16, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 415/3000 [01:54<10:19, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 417/3000 [01:55<09:52, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 418/3000 [01:56<14:28, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 420/3000 [01:56<14:48, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 422/3000 [01:57<11:52, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 423/3000 [01:57<13:32, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 427/3000 [01:58<07:34, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 428/3000 [01:58<08:23, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 432/3000 [01:58<05:30, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 434/3000 [01:59<12:05, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 436/3000 [02:00<13:52, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 437/3000 [02:01<14:36, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 438/3000 [02:01<15:48, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 440/3000 [02:02<13:02, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 441/3000 [02:02<14:58, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 442/3000 [02:03<16:13, 2.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 444/3000 [02:03<13:26, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 448/3000 [02:03<06:21, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 449/3000 [02:04<07:15, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 450/3000 [02:04<08:15, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 452/3000 [02:04<08:28, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 455/3000 [02:05<07:36, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 456/3000 [02:05<08:42, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 457/3000 [02:06<12:05, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 458/3000 [02:06<13:05, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 459/3000 [02:06<15:49, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 461/3000 [02:07<13:05, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 462/3000 [02:07<12:28, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 465/3000 [02:08<11:23, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 467/3000 [02:09<11:18, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 468/3000 [02:09<12:26, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 470/3000 [02:09<11:28, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 472/3000 [02:10<09:38, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 473/3000 [02:10<10:26, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 476/3000 [02:11<10:25, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 477/3000 [02:11<12:09, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 478/3000 [02:12<14:03, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 480/3000 [02:12<12:24, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 482/3000 [02:13<13:02, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 484/3000 [02:13<09:48, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [02:14<16:01, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 486/3000 [02:14<14:14, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 492/3000 [02:15<07:55, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 493/3000 [02:16<11:27, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 494/3000 [02:16<12:38, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 497/3000 [02:17<08:47, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 498/3000 [02:17<08:14, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 499/3000 [02:17<09:16, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 500/3000 [02:18<09:18, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 501/3000 [02:18<10:56, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 503/3000 [02:18<10:50, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 505/3000 [02:19<09:03, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 507/3000 [02:20<13:40, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 509/3000 [02:20<09:17, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 512/3000 [02:20<05:14, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 514/3000 [02:21<11:29, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 515/3000 [02:22<14:10, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 516/3000 [02:22<14:20, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 519/3000 [02:23<08:45, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 521/3000 [02:23<08:23, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 522/3000 [02:23<10:50, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 523/3000 [02:24<10:22, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 525/3000 [02:24<09:26, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 527/3000 [02:25<11:09, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 529/3000 [02:25<09:50, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 530/3000 [02:25<10:51, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 532/3000 [02:26<12:09, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 533/3000 [02:27<15:02, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 534/3000 [02:27<13:41, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 536/3000 [02:27<11:14, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 540/3000 [02:28<06:34, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 541/3000 [02:28<06:29, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 544/3000 [02:29<10:50, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 546/3000 [02:29<09:24, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [02:30<07:41, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 549/3000 [02:30<07:54, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 550/3000 [02:30<10:21, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 551/3000 [02:31<10:26, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 554/3000 [02:31<09:19, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 555/3000 [02:31<09:11, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 557/3000 [02:32<11:47, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 558/3000 [02:33<15:44, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 559/3000 [02:33<14:25, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 561/3000 [02:34<11:47, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 564/3000 [02:34<07:27, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 566/3000 [02:34<07:57, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 567/3000 [02:35<10:56, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 569/3000 [02:35<08:18, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 571/3000 [02:35<06:56, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 572/3000 [02:36<09:16, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 573/3000 [02:36<09:34, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 574/3000 [02:36<09:45, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 575/3000 [02:37<16:20, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 576/3000 [02:38<18:12, 2.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 577/3000 [02:38<16:04, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 578/3000 [02:38<14:11, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 579/3000 [02:39<13:26, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 580/3000 [02:39<14:19, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 583/3000 [02:39<08:06, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 586/3000 [02:40<06:31, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 587/3000 [02:40<08:08, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 588/3000 [02:41<10:44, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 589/3000 [02:41<11:17, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 591/3000 [02:41<09:32, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 592/3000 [02:42<09:21, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 593/3000 [02:42<10:57, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 594/3000 [02:43<15:45, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 596/3000 [02:43<10:33, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 597/3000 [02:43<12:29, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 599/3000 [02:44<10:35, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 600/3000 [02:44<12:54, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 602/3000 [02:45<12:15, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 603/3000 [02:45<11:31, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 604/3000 [02:45<11:16, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 605/3000 [02:46<13:31, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 608/3000 [02:46<09:15, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 610/3000 [02:46<07:52, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 611/3000 [02:47<08:03, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 612/3000 [02:47<08:57, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 614/3000 [02:48<10:25, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 615/3000 [02:48<13:42, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 617/3000 [02:49<11:36, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 618/3000 [02:49<13:56, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 621/3000 [02:50<09:26, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 623/3000 [02:50<08:48, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 624/3000 [02:51<15:47, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 626/3000 [02:51<10:36, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 627/3000 [02:51<10:01, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 628/3000 [02:52<10:14, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 630/3000 [02:52<11:45, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 631/3000 [02:53<17:17, 2.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 634/3000 [02:54<09:57, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 635/3000 [02:54<09:50, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 636/3000 [02:55<15:05, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 639/3000 [02:55<10:05, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 641/3000 [02:56<08:56, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 642/3000 [02:56<11:16, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 643/3000 [02:57<14:14, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 644/3000 [02:57<13:22, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 645/3000 [02:57<13:18, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 649/3000 [02:58<07:38, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 651/3000 [02:59<13:29, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 652/3000 [03:00<14:23, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 653/3000 [03:00<12:57, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 655/3000 [03:01<15:45, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 658/3000 [03:01<10:59, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 659/3000 [03:02<11:43, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 662/3000 [03:02<08:51, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 664/3000 [03:03<08:27, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 665/3000 [03:03<10:18, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 667/3000 [03:04<10:12, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 668/3000 [03:04<10:48, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 671/3000 [03:04<06:58, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 672/3000 [03:05<11:27, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 673/3000 [03:05<12:09, 3.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▎ | 675/3000 [03:06<11:26, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 677/3000 [03:06<09:24, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 679/3000 [03:07<11:03, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 680/3000 [03:07<10:41, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 681/3000 [03:07<10:22, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 682/3000 [03:08<11:33, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 684/3000 [03:09<12:46, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 685/3000 [03:09<15:06, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 686/3000 [03:10<16:06, 2.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 688/3000 [03:10<12:44, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 689/3000 [03:10<11:38, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 690/3000 [03:11<10:47, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 691/3000 [03:11<11:50, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 693/3000 [03:11<09:39, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 694/3000 [03:12<08:24, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 696/3000 [03:12<11:05, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 698/3000 [03:13<07:55, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 700/3000 [03:13<07:27, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 702/3000 [03:13<07:32, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 703/3000 [03:14<07:16, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 705/3000 [03:14<08:37, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 706/3000 [03:15<12:24, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 707/3000 [03:15<12:02, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 708/3000 [03:15<13:17, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 709/3000 [03:16<14:57, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 711/3000 [03:16<10:01, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 713/3000 [03:17<10:05, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 714/3000 [03:17<13:07, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 715/3000 [03:18<13:55, 2.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 717/3000 [03:18<09:33, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 721/3000 [03:18<06:32, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 723/3000 [03:19<05:38, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 726/3000 [03:20<10:46, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 727/3000 [03:20<09:26, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 728/3000 [03:21<10:48, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 729/3000 [03:21<11:41, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 731/3000 [03:22<10:21, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 732/3000 [03:22<10:15, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 733/3000 [03:22<11:01, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 734/3000 [03:23<12:57, 2.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 736/3000 [03:23<10:26, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 737/3000 [03:23<09:52, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 738/3000 [03:24<15:10, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 739/3000 [03:25<16:58, 2.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 740/3000 [03:25<16:22, 2.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 741/3000 [03:26<16:24, 2.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 743/3000 [03:26<10:56, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 744/3000 [03:26<12:42, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 746/3000 [03:27<10:12, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 747/3000 [03:27<09:13, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 749/3000 [03:27<07:20, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 750/3000 [03:28<11:33, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 753/3000 [03:28<09:25, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 755/3000 [03:29<07:23, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 756/3000 [03:29<09:38, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 757/3000 [03:29<10:46, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 758/3000 [03:30<11:58, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 760/3000 [03:31<14:07, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 762/3000 [03:32<12:41, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 763/3000 [03:32<10:53, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 766/3000 [03:32<09:34, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 767/3000 [03:33<08:20, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 768/3000 [03:33<12:39, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 769/3000 [03:33<11:25, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 771/3000 [03:34<09:44, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 774/3000 [03:35<08:59, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 775/3000 [03:35<08:19, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 776/3000 [03:35<12:34, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 777/3000 [03:36<11:29, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 779/3000 [03:36<10:03, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 780/3000 [03:36<09:57, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 781/3000 [03:37<09:27, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 784/3000 [03:37<09:12, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 785/3000 [03:38<12:00, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 787/3000 [03:39<11:19, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 788/3000 [03:39<09:45, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 790/3000 [03:39<08:26, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 791/3000 [03:39<07:54, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 792/3000 [03:40<10:33, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 795/3000 [03:40<08:16, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 797/3000 [03:41<07:07, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 798/3000 [03:41<07:20, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 799/3000 [03:42<13:38, 2.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 800/3000 [03:42<13:48, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 802/3000 [03:43<11:33, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 803/3000 [03:44<18:10, 2.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 804/3000 [03:44<16:34, 2.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 806/3000 [03:45<14:10, 2.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 809/3000 [03:45<09:00, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 810/3000 [03:45<07:50, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 812/3000 [03:46<08:12, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 815/3000 [03:46<06:57, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 817/3000 [03:47<06:29, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 818/3000 [03:47<05:50, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 820/3000 [03:47<07:28, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 821/3000 [03:48<08:28, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 822/3000 [03:48<10:10, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 823/3000 [03:49<21:07, 1.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 824/3000 [03:50<18:26, 1.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 826/3000 [03:50<11:49, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 829/3000 [03:50<07:57, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 830/3000 [03:51<09:16, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 831/3000 [03:51<11:12, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 832/3000 [03:52<10:57, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 833/3000 [03:52<13:21, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 835/3000 [03:53<12:17, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 837/3000 [03:53<09:34, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 839/3000 [03:54<10:44, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 841/3000 [03:55<14:19, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 844/3000 [03:55<09:57, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 846/3000 [03:56<09:06, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 847/3000 [03:56<11:20, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 850/3000 [03:57<07:14, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 851/3000 [03:57<07:15, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 852/3000 [03:58<15:38, 2.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 855/3000 [03:59<10:00, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 856/3000 [04:00<18:28, 1.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 857/3000 [04:00<17:26, 2.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 863/3000 [04:01<07:07, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 866/3000 [04:03<12:47, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 868/3000 [04:03<09:29, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 869/3000 [04:03<10:04, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 871/3000 [04:04<09:20, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 873/3000 [04:05<13:13, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 874/3000 [04:05<10:55, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 876/3000 [04:05<08:11, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 878/3000 [04:06<08:26, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 879/3000 [04:06<07:16, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 881/3000 [04:07<11:16, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 882/3000 [04:07<10:46, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 883/3000 [04:08<13:36, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 884/3000 [04:08<12:43, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 885/3000 [04:09<12:23, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 887/3000 [04:10<14:57, 2.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 888/3000 [04:10<16:02, 2.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [04:10<09:33, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 893/3000 [04:11<08:02, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 894/3000 [04:11<10:47, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 895/3000 [04:12<12:45, 2.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 897/3000 [04:13<13:48, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 899/3000 [04:13<11:55, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 901/3000 [04:14<10:51, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 902/3000 [04:14<11:05, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 904/3000 [04:15<10:23, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 905/3000 [04:15<08:51, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 907/3000 [04:15<07:32, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 909/3000 [04:16<06:31, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 910/3000 [04:17<16:09, 2.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 911/3000 [04:18<16:14, 2.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 912/3000 [04:18<16:13, 2.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 913/3000 [04:19<15:19, 2.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 914/3000 [04:19<13:49, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 915/3000 [04:19<12:41, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 917/3000 [04:20<10:37, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 918/3000 [04:20<09:54, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 920/3000 [04:20<07:38, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 923/3000 [04:21<07:12, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 925/3000 [04:21<06:01, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 927/3000 [04:21<06:08, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 928/3000 [04:21<05:47, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 929/3000 [04:22<08:41, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 930/3000 [04:22<09:15, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 931/3000 [04:22<08:58, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 933/3000 [04:23<09:42, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 934/3000 [04:24<14:36, 2.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 936/3000 [04:24<10:48, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 937/3000 [04:24<09:32, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 940/3000 [04:25<06:34, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 942/3000 [04:26<12:36, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 944/3000 [04:27<13:07, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 945/3000 [04:28<14:45, 2.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 949/3000 [04:28<07:38, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 951/3000 [04:29<09:37, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 952/3000 [04:30<11:38, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 954/3000 [04:30<10:04, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 955/3000 [04:31<11:51, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 957/3000 [04:31<10:07, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 958/3000 [04:31<09:31, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 960/3000 [04:32<09:22, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 961/3000 [04:32<11:00, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 962/3000 [04:33<11:06, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 964/3000 [04:33<12:09, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 965/3000 [04:34<16:13, 2.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 970/3000 [04:35<08:25, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 972/3000 [04:35<07:35, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 973/3000 [04:36<08:10, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 974/3000 [04:36<08:24, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 976/3000 [04:36<09:02, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 977/3000 [04:37<12:43, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 978/3000 [04:38<11:50, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 979/3000 [04:38<10:42, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 980/3000 [04:38<11:42, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 982/3000 [04:38<09:02, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 984/3000 [04:39<08:02, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 986/3000 [04:40<09:53, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 987/3000 [04:40<08:55, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 988/3000 [04:40<10:39, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 989/3000 [04:41<12:35, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 992/3000 [04:41<07:40, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 993/3000 [04:42<12:01, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 997/3000 [04:42<06:48, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 999/3000 [04:43<06:14, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1000/3000 [04:44<10:50, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1003/3000 [04:44<09:07, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1007/3000 [04:46<08:12, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1008/3000 [04:46<08:26, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1009/3000 [04:47<11:16, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1010/3000 [04:47<10:51, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1011/3000 [04:47<10:21, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1012/3000 [04:47<10:15, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1014/3000 [04:48<08:38, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1016/3000 [04:48<07:11, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1019/3000 [04:49<08:23, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1021/3000 [04:49<06:33, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1022/3000 [04:50<07:01, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1024/3000 [04:50<08:04, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1026/3000 [04:52<13:11, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1028/3000 [04:52<08:34, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1030/3000 [04:53<10:19, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1031/3000 [04:53<08:49, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1033/3000 [04:53<07:10, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1034/3000 [04:53<07:33, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1036/3000 [04:54<08:03, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1038/3000 [04:55<08:38, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1041/3000 [04:55<05:17, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1042/3000 [04:55<05:46, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1044/3000 [04:56<08:34, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1047/3000 [04:57<10:38, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1050/3000 [04:58<08:41, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1051/3000 [04:58<10:36, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1052/3000 [04:59<11:32, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1054/3000 [04:59<09:58, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1057/3000 [05:00<08:33, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1059/3000 [05:00<06:18, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1060/3000 [05:01<12:23, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1062/3000 [05:02<13:22, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1063/3000 [05:02<12:58, 2.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1065/3000 [05:03<09:22, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1068/3000 [05:03<08:05, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1071/3000 [05:04<07:07, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1072/3000 [05:04<07:19, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1073/3000 [05:05<11:47, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1075/3000 [05:05<09:50, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1077/3000 [05:06<09:20, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1078/3000 [05:06<11:49, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1079/3000 [05:07<10:57, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1080/3000 [05:07<11:25, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1081/3000 [05:08<12:42, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1083/3000 [05:08<10:46, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1084/3000 [05:08<10:23, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1086/3000 [05:09<08:25, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1087/3000 [05:09<10:21, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1089/3000 [05:10<09:17, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1092/3000 [05:10<05:57, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1094/3000 [05:11<06:03, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1096/3000 [05:11<07:23, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1098/3000 [05:12<06:15, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1099/3000 [05:12<07:45, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1101/3000 [05:13<13:04, 2.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1102/3000 [05:14<11:27, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1103/3000 [05:14<14:55, 2.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1105/3000 [05:15<09:45, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1107/3000 [05:15<07:51, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1108/3000 [05:15<06:38, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1109/3000 [05:15<06:56, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1110/3000 [05:16<08:08, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1112/3000 [05:16<07:31, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1113/3000 [05:17<09:44, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1114/3000 [05:18<15:29, 2.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1115/3000 [05:18<14:47, 2.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1118/3000 [05:19<10:10, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1121/3000 [05:19<06:27, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1123/3000 [05:21<12:42, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1125/3000 [05:21<11:12, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1128/3000 [05:22<07:02, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1129/3000 [05:22<06:07, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1131/3000 [05:23<07:16, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1133/3000 [05:23<06:25, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1136/3000 [05:24<07:08, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1137/3000 [05:24<09:47, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1140/3000 [05:26<12:00, 2.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1141/3000 [05:26<12:07, 2.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1143/3000 [05:26<09:48, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1144/3000 [05:27<11:04, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1146/3000 [05:28<10:42, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1147/3000 [05:28<09:09, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1151/3000 [05:28<05:19, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1152/3000 [05:29<06:33, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1154/3000 [05:29<07:10, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1156/3000 [05:30<08:15, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1158/3000 [05:30<06:42, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1160/3000 [05:31<08:52, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1162/3000 [05:31<06:07, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1163/3000 [05:32<10:57, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1164/3000 [05:32<10:36, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1166/3000 [05:33<10:02, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [05:33<08:47, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1169/3000 [05:34<09:26, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1170/3000 [05:34<10:17, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1171/3000 [05:34<09:50, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1172/3000 [05:35<12:05, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1176/3000 [05:36<07:06, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1177/3000 [05:36<06:49, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1178/3000 [05:37<10:58, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1180/3000 [05:37<09:32, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1181/3000 [05:37<07:45, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1184/3000 [05:38<06:04, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1185/3000 [05:38<06:59, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1187/3000 [05:38<05:55, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1189/3000 [05:40<10:17, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1191/3000 [05:40<10:47, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1193/3000 [05:41<07:25, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1196/3000 [05:41<06:34, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1197/3000 [05:42<07:36, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1198/3000 [05:42<07:52, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1199/3000 [05:43<10:50, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1202/3000 [05:43<06:39, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1204/3000 [05:43<07:14, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1205/3000 [05:44<06:50, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1206/3000 [05:44<10:02, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1208/3000 [05:45<07:36, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1209/3000 [05:45<11:22, 2.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1211/3000 [05:46<08:19, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1213/3000 [05:46<07:21, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1215/3000 [05:47<07:01, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1216/3000 [05:47<09:22, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1217/3000 [05:48<10:44, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1220/3000 [05:48<08:04, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1221/3000 [05:48<07:35, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1222/3000 [05:49<08:42, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1223/3000 [05:49<08:29, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1224/3000 [05:50<13:57, 2.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1225/3000 [05:50<11:46, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1226/3000 [05:50<10:07, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1228/3000 [05:51<09:47, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1230/3000 [05:52<09:56, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1231/3000 [05:52<09:15, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1232/3000 [05:52<08:32, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1234/3000 [05:53<07:43, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1235/3000 [05:53<08:52, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1237/3000 [05:54<07:28, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1239/3000 [05:54<07:06, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1242/3000 [05:55<05:23, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1243/3000 [05:55<05:46, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1244/3000 [05:56<14:32, 2.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1245/3000 [05:57<13:38, 2.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1248/3000 [05:57<07:39, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1249/3000 [05:57<07:58, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1250/3000 [05:58<09:47, 2.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1252/3000 [05:58<09:03, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1253/3000 [06:00<16:36, 1.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1256/3000 [06:00<09:18, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1257/3000 [06:00<08:00, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1259/3000 [06:00<06:09, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1261/3000 [06:01<06:36, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1262/3000 [06:02<13:52, 2.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1268/3000 [06:03<05:28, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1270/3000 [06:04<07:30, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1271/3000 [06:04<09:33, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1273/3000 [06:05<08:50, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1274/3000 [06:05<07:36, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▎ | 1275/3000 [06:05<07:10, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1277/3000 [06:06<09:50, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1279/3000 [06:07<11:16, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1282/3000 [06:08<08:34, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1284/3000 [06:08<08:19, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1288/3000 [06:09<06:35, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1289/3000 [06:10<08:21, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1292/3000 [06:10<06:26, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1295/3000 [06:11<05:08, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1297/3000 [06:11<04:04, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1299/3000 [06:13<12:21, 2.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1301/3000 [06:13<09:15, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1302/3000 [06:14<09:57, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1303/3000 [06:14<09:41, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1304/3000 [06:14<10:54, 2.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1306/3000 [06:15<08:08, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1307/3000 [06:15<07:44, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1308/3000 [06:15<07:30, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1311/3000 [06:16<05:15, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1312/3000 [06:16<06:31, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1314/3000 [06:17<07:11, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1316/3000 [06:17<06:41, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1317/3000 [06:18<10:52, 2.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1320/3000 [06:19<06:59, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1322/3000 [06:19<06:01, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1323/3000 [06:20<10:53, 2.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1325/3000 [06:20<09:09, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1326/3000 [06:21<09:33, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1328/3000 [06:21<08:55, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1330/3000 [06:22<09:14, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1332/3000 [06:22<07:24, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1335/3000 [06:23<05:08, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1336/3000 [06:23<06:54, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1337/3000 [06:24<08:25, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1338/3000 [06:24<07:40, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1340/3000 [06:25<09:57, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1341/3000 [06:25<11:03, 2.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1342/3000 [06:27<15:23, 1.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1348/3000 [06:28<07:31, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1351/3000 [06:28<05:50, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1352/3000 [06:29<07:35, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1354/3000 [06:29<07:48, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1356/3000 [06:30<05:53, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1357/3000 [06:30<08:09, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1359/3000 [06:31<09:58, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1361/3000 [06:32<08:54, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1363/3000 [06:32<07:19, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1365/3000 [06:32<05:28, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1366/3000 [06:33<07:42, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1369/3000 [06:33<05:47, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1372/3000 [06:34<04:43, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1373/3000 [06:34<05:42, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1374/3000 [06:35<08:53, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1375/3000 [06:36<11:28, 2.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1376/3000 [06:37<16:57, 1.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1379/3000 [06:37<09:11, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1381/3000 [06:37<07:32, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1382/3000 [06:38<08:29, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1384/3000 [06:38<06:41, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1388/3000 [06:39<05:00, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1389/3000 [06:39<07:46, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1390/3000 [06:40<07:58, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1392/3000 [06:40<07:11, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1394/3000 [06:40<05:27, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1395/3000 [06:41<08:17, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1396/3000 [06:42<09:40, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1397/3000 [06:42<10:06, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1399/3000 [06:43<08:54, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1400/3000 [06:44<14:14, 1.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1402/3000 [06:44<09:38, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1403/3000 [06:44<08:17, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1404/3000 [06:45<08:54, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1405/3000 [06:45<08:07, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1406/3000 [06:45<08:16, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1408/3000 [06:46<06:46, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1409/3000 [06:46<08:22, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1412/3000 [06:47<06:27, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1414/3000 [06:47<06:28, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1417/3000 [06:48<05:48, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1418/3000 [06:48<06:31, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1419/3000 [06:49<07:48, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1420/3000 [06:49<07:26, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1421/3000 [06:49<08:24, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1423/3000 [06:51<11:20, 2.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1425/3000 [06:51<09:49, 2.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1427/3000 [06:52<07:51, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1428/3000 [06:52<07:27, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1430/3000 [06:52<06:54, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1434/3000 [06:53<04:37, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1436/3000 [06:54<06:20, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1437/3000 [06:54<06:09, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1438/3000 [06:54<06:21, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1440/3000 [06:55<05:17, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1441/3000 [06:55<08:32, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1442/3000 [06:56<09:36, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1443/3000 [06:56<09:23, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1444/3000 [06:56<09:02, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1446/3000 [06:57<07:02, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1449/3000 [06:57<06:01, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1450/3000 [06:58<06:07, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1451/3000 [06:58<06:59, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1454/3000 [06:59<08:15, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1455/3000 [06:59<07:14, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [07:00<08:15, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1458/3000 [07:00<07:35, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1459/3000 [07:01<10:06, 2.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1460/3000 [07:01<09:39, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1461/3000 [07:02<09:30, 2.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1463/3000 [07:03<10:29, 2.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1465/3000 [07:03<08:07, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1467/3000 [07:03<06:25, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1469/3000 [07:04<05:10, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1470/3000 [07:04<05:35, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1472/3000 [07:04<05:08, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1473/3000 [07:05<04:55, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1475/3000 [07:05<04:03, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1476/3000 [07:06<10:02, 2.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1478/3000 [07:06<08:07, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1479/3000 [07:07<07:55, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1480/3000 [07:08<14:41, 1.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1482/3000 [07:09<10:54, 2.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1485/3000 [07:09<06:21, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1487/3000 [07:09<06:32, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1489/3000 [07:10<05:44, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1491/3000 [07:10<06:39, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1492/3000 [07:11<06:24, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1493/3000 [07:11<06:59, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1494/3000 [07:11<07:39, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1496/3000 [07:12<09:27, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1498/3000 [07:13<06:50, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1501/3000 [07:13<04:53, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1502/3000 [07:14<06:40, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1503/3000 [07:14<07:35, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1506/3000 [07:15<05:10, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1507/3000 [07:16<09:08, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1508/3000 [07:16<10:39, 2.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1510/3000 [07:17<10:19, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1511/3000 [07:17<09:39, 2.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1512/3000 [07:18<09:23, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1516/3000 [07:18<05:32, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1519/3000 [07:18<04:02, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1522/3000 [07:19<05:10, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1523/3000 [07:20<07:53, 3.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1525/3000 [07:21<08:01, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1526/3000 [07:21<08:46, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1528/3000 [07:22<08:45, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1533/3000 [07:23<05:08, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1535/3000 [07:23<05:16, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1536/3000 [07:24<06:14, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1537/3000 [07:24<06:21, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1539/3000 [07:24<05:58, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1540/3000 [07:25<05:49, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1541/3000 [07:25<07:46, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1543/3000 [07:27<12:27, 1.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1544/3000 [07:27<10:46, 2.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1546/3000 [07:27<08:17, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1548/3000 [07:28<06:46, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1549/3000 [07:28<06:27, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1551/3000 [07:28<05:59, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1554/3000 [07:29<03:55, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1555/3000 [07:29<04:06, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1557/3000 [07:30<06:00, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1558/3000 [07:30<06:31, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1559/3000 [07:30<06:17, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1560/3000 [07:31<07:23, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1562/3000 [07:31<05:18, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1563/3000 [07:32<09:18, 2.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1565/3000 [07:33<10:04, 2.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1566/3000 [07:33<10:01, 2.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1569/3000 [07:34<06:24, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1572/3000 [07:34<04:18, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1573/3000 [07:35<07:02, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1577/3000 [07:35<04:45, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1578/3000 [07:36<07:26, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1580/3000 [07:37<06:12, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1581/3000 [07:37<06:36, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1582/3000 [07:37<07:08, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1584/3000 [07:38<07:28, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1585/3000 [07:38<07:45, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1587/3000 [07:39<06:08, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1589/3000 [07:39<05:46, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1590/3000 [07:39<05:57, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1593/3000 [07:40<04:45, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1595/3000 [07:40<04:51, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1597/3000 [07:41<06:45, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1600/3000 [07:42<04:20, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1601/3000 [07:42<05:15, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1603/3000 [07:42<05:28, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1604/3000 [07:44<11:04, 2.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1605/3000 [07:44<11:08, 2.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1608/3000 [07:45<07:53, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1611/3000 [07:45<05:25, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1613/3000 [07:45<04:18, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1616/3000 [07:46<03:10, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1617/3000 [07:46<03:20, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1618/3000 [07:47<06:30, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1619/3000 [07:47<06:25, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1621/3000 [07:47<05:00, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1623/3000 [07:48<07:58, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1624/3000 [07:49<11:12, 2.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1625/3000 [07:50<12:12, 1.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1627/3000 [07:50<09:04, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1630/3000 [07:51<04:39, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1632/3000 [07:51<04:16, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1634/3000 [07:52<05:09, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1636/3000 [07:52<04:14, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1637/3000 [07:52<05:35, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1639/3000 [07:52<04:31, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1640/3000 [07:53<06:24, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1642/3000 [07:53<05:13, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1643/3000 [07:54<05:13, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1644/3000 [07:55<10:20, 2.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1645/3000 [07:55<09:01, 2.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1648/3000 [07:55<05:50, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1649/3000 [07:56<06:08, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1650/3000 [07:56<06:53, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1652/3000 [07:57<06:36, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1653/3000 [07:57<07:40, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1657/3000 [07:58<05:03, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1658/3000 [07:58<04:49, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1659/3000 [07:59<09:12, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1660/3000 [08:00<09:07, 2.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1663/3000 [08:00<05:23, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1664/3000 [08:00<06:26, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1666/3000 [08:01<06:06, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1669/3000 [08:01<04:17, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1671/3000 [08:01<03:43, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1672/3000 [08:02<06:16, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1673/3000 [08:03<06:51, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1675/3000 [08:03<05:41, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1677/3000 [08:04<06:38, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [08:05<08:27, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1682/3000 [08:05<06:01, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1684/3000 [08:06<07:02, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1685/3000 [08:07<07:14, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1686/3000 [08:07<08:34, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1688/3000 [08:08<06:39, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1690/3000 [08:08<05:19, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1693/3000 [08:08<03:59, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1694/3000 [08:09<05:04, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1696/3000 [08:10<06:16, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1698/3000 [08:10<05:11, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1701/3000 [08:11<05:44, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1702/3000 [08:12<08:27, 2.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [08:12<07:49, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1707/3000 [08:13<04:48, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1708/3000 [08:13<05:22, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1710/3000 [08:14<05:22, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1711/3000 [08:14<07:04, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1714/3000 [08:15<04:50, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1716/3000 [08:15<05:36, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1718/3000 [08:16<05:25, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1719/3000 [08:16<06:23, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1720/3000 [08:16<06:33, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1722/3000 [08:17<05:52, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1723/3000 [08:17<06:52, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1724/3000 [08:18<06:51, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▊ | 1725/3000 [08:18<07:29, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1727/3000 [08:18<05:26, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1729/3000 [08:19<06:25, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1730/3000 [08:20<07:08, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1731/3000 [08:20<07:30, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1733/3000 [08:21<06:23, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1734/3000 [08:21<08:40, 2.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1737/3000 [08:22<06:06, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1739/3000 [08:23<06:41, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1741/3000 [08:24<07:09, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1742/3000 [08:24<07:07, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1743/3000 [08:24<07:34, 2.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1745/3000 [08:25<06:09, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1748/3000 [08:25<04:13, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1749/3000 [08:25<04:12, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1750/3000 [08:26<04:31, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1752/3000 [08:27<07:05, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1753/3000 [08:27<08:25, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1755/3000 [08:28<07:23, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1757/3000 [08:28<05:41, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1758/3000 [08:29<06:03, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1759/3000 [08:29<05:44, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1761/3000 [08:29<04:28, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1762/3000 [08:30<09:29, 2.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1765/3000 [08:31<05:51, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1766/3000 [08:31<06:23, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1768/3000 [08:32<05:59, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1769/3000 [08:32<05:40, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1770/3000 [08:33<09:02, 2.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1771/3000 [08:33<08:29, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1773/3000 [08:34<06:17, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1775/3000 [08:34<06:02, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1777/3000 [08:35<05:40, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1779/3000 [08:35<05:06, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1780/3000 [08:36<05:47, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1782/3000 [08:36<04:57, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1783/3000 [08:36<05:46, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1784/3000 [08:37<09:25, 2.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1787/3000 [08:38<04:49, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1788/3000 [08:38<06:24, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1789/3000 [08:39<07:37, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1791/3000 [08:39<06:47, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1792/3000 [08:40<07:20, 2.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1793/3000 [08:40<08:44, 2.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1797/3000 [08:41<05:19, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1799/3000 [08:42<05:10, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1800/3000 [08:42<05:33, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1802/3000 [08:43<06:36, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1805/3000 [08:43<05:02, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1807/3000 [08:44<04:50, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1809/3000 [08:45<05:28, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1810/3000 [08:45<06:30, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1812/3000 [08:46<06:42, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1813/3000 [08:47<10:21, 1.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1814/3000 [08:47<08:45, 2.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1817/3000 [08:48<06:42, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1818/3000 [08:48<06:03, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1820/3000 [08:49<04:43, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1821/3000 [08:49<04:33, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1824/3000 [08:49<03:42, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1826/3000 [08:50<04:57, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1828/3000 [08:50<04:20, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1829/3000 [08:51<07:38, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1830/3000 [08:52<07:19, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1831/3000 [08:52<07:56, 2.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1833/3000 [08:53<06:41, 2.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1835/3000 [08:53<04:47, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1836/3000 [08:53<04:07, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1837/3000 [08:53<04:40, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1839/3000 [08:55<06:56, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1842/3000 [08:55<04:47, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1843/3000 [08:55<04:16, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1844/3000 [08:57<09:41, 1.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1846/3000 [08:57<07:03, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1848/3000 [08:57<05:24, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1849/3000 [08:58<07:50, 2.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1850/3000 [08:59<08:31, 2.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1852/3000 [08:59<07:43, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1853/3000 [09:00<07:03, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1856/3000 [09:00<03:50, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1857/3000 [09:01<05:39, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1860/3000 [09:01<04:15, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1861/3000 [09:02<05:39, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1862/3000 [09:02<07:44, 2.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1863/3000 [09:03<09:24, 2.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1864/3000 [09:04<10:39, 1.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1865/3000 [09:04<09:56, 1.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1867/3000 [09:05<07:31, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1869/3000 [09:06<07:39, 2.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1872/3000 [09:07<05:52, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1877/3000 [09:07<03:36, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1879/3000 [09:08<03:43, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1880/3000 [09:09<05:36, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1881/3000 [09:09<05:30, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1882/3000 [09:10<07:32, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1883/3000 [09:10<06:40, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1885/3000 [09:10<05:40, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1886/3000 [09:11<05:32, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1888/3000 [09:11<06:11, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1890/3000 [09:12<04:52, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1891/3000 [09:12<04:56, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1892/3000 [09:12<05:34, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1893/3000 [09:13<07:44, 2.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1894/3000 [09:14<11:58, 1.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1896/3000 [09:15<08:52, 2.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1897/3000 [09:15<08:16, 2.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1899/3000 [09:16<06:23, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1901/3000 [09:16<05:19, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1902/3000 [09:17<07:01, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1904/3000 [09:17<05:09, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1905/3000 [09:18<07:51, 2.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1906/3000 [09:19<08:28, 2.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1908/3000 [09:19<06:14, 2.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1910/3000 [09:20<06:07, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1911/3000 [09:20<05:06, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1912/3000 [09:20<04:55, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1913/3000 [09:21<07:13, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1914/3000 [09:22<10:05, 1.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1916/3000 [09:22<06:23, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1918/3000 [09:22<05:09, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1919/3000 [09:23<06:15, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1922/3000 [09:23<04:39, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1923/3000 [09:25<07:58, 2.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1924/3000 [09:25<08:52, 2.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1925/3000 [09:26<09:20, 1.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1928/3000 [09:26<05:52, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1929/3000 [09:27<08:14, 2.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1930/3000 [09:28<08:51, 2.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1932/3000 [09:28<06:16, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1933/3000 [09:28<05:06, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1934/3000 [09:29<05:23, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1936/3000 [09:29<05:23, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1937/3000 [09:30<04:33, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1939/3000 [09:30<03:32, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1940/3000 [09:31<05:25, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1942/3000 [09:31<04:39, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1943/3000 [09:31<05:26, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1944/3000 [09:32<08:08, 2.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1946/3000 [09:33<06:03, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1948/3000 [09:33<04:57, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1949/3000 [09:34<07:00, 2.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1950/3000 [09:34<07:15, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1953/3000 [09:35<05:01, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1956/3000 [09:35<03:24, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1957/3000 [09:36<05:51, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1958/3000 [09:37<08:36, 2.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1960/3000 [09:38<06:13, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1962/3000 [09:38<05:18, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1964/3000 [09:38<03:42, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1965/3000 [09:39<04:14, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1967/3000 [09:40<05:40, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1969/3000 [09:41<08:13, 2.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1972/3000 [09:41<04:00, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1975/3000 [09:42<03:40, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1976/3000 [09:43<05:16, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1977/3000 [09:43<05:56, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1979/3000 [09:44<04:45, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1982/3000 [09:44<03:37, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1983/3000 [09:44<03:16, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1984/3000 [09:45<04:33, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1985/3000 [09:45<04:26, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1987/3000 [09:46<04:56, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1988/3000 [09:46<05:05, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1989/3000 [09:46<06:01, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1990/3000 [09:47<09:05, 1.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1992/3000 [09:48<06:47, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1994/3000 [09:49<05:35, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1996/3000 [09:49<05:37, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1999/3000 [09:49<03:08, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2000/3000 [09:50<04:05, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2003/3000 [09:51<04:29, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2007/3000 [09:51<02:31, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2009/3000 [09:52<03:59, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2010/3000 [09:53<05:11, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2011/3000 [09:53<06:38, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2012/3000 [09:54<07:15, 2.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2013/3000 [09:55<07:27, 2.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2015/3000 [09:55<06:22, 2.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2016/3000 [09:55<05:11, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2019/3000 [09:56<04:04, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2020/3000 [09:56<04:29, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2023/3000 [09:57<03:50, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2026/3000 [09:57<02:42, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2027/3000 [09:58<03:05, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2030/3000 [09:59<04:51, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2033/3000 [10:00<05:01, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2035/3000 [10:01<04:27, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2036/3000 [10:01<04:55, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2037/3000 [10:02<05:59, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2040/3000 [10:02<03:20, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2041/3000 [10:03<05:38, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2044/3000 [10:03<04:34, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2045/3000 [10:05<08:27, 1.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2046/3000 [10:05<07:36, 2.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2052/3000 [10:06<03:20, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2054/3000 [10:06<03:35, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2055/3000 [10:07<03:19, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2056/3000 [10:07<03:46, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2057/3000 [10:07<03:48, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2059/3000 [10:08<03:20, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2060/3000 [10:08<03:44, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2061/3000 [10:08<03:48, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2062/3000 [10:09<05:24, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2065/3000 [10:11<06:50, 2.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2068/3000 [10:11<04:08, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2069/3000 [10:11<04:48, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2070/3000 [10:12<04:25, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2071/3000 [10:12<06:02, 2.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2072/3000 [10:13<05:48, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2075/3000 [10:13<03:57, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2076/3000 [10:13<03:22, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2077/3000 [10:13<03:28, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2079/3000 [10:14<04:19, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2081/3000 [10:15<04:26, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2084/3000 [10:16<05:21, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2085/3000 [10:16<04:34, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2088/3000 [10:17<03:25, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2089/3000 [10:17<03:44, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2091/3000 [10:18<03:53, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2094/3000 [10:19<03:52, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2096/3000 [10:19<03:05, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2097/3000 [10:19<03:51, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2098/3000 [10:20<03:42, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2100/3000 [10:20<03:06, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2101/3000 [10:21<05:25, 2.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2102/3000 [10:21<06:13, 2.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2103/3000 [10:22<06:03, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2106/3000 [10:22<03:51, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2108/3000 [10:23<05:53, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2109/3000 [10:24<05:35, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2111/3000 [10:24<04:54, 3.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2113/3000 [10:25<04:58, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2118/3000 [10:26<03:09, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2119/3000 [10:26<03:30, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2122/3000 [10:26<02:41, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2123/3000 [10:27<03:08, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2124/3000 [10:27<03:17, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2126/3000 [10:28<05:41, 2.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2127/3000 [10:29<06:22, 2.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2130/3000 [10:29<04:02, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2133/3000 [10:30<02:49, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2135/3000 [10:30<02:25, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2136/3000 [10:31<03:18, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2137/3000 [10:31<05:00, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2138/3000 [10:32<04:33, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2140/3000 [10:32<03:54, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2143/3000 [10:33<03:02, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2144/3000 [10:33<02:48, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2145/3000 [10:33<03:58, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2146/3000 [10:34<04:24, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2147/3000 [10:34<05:43, 2.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2148/3000 [10:35<05:01, 2.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2151/3000 [10:35<03:26, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2154/3000 [10:36<03:47, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2157/3000 [10:37<03:34, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2158/3000 [10:37<03:12, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2159/3000 [10:37<03:41, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2160/3000 [10:38<05:06, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2161/3000 [10:38<04:47, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2162/3000 [10:39<05:32, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2163/3000 [10:39<05:11, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2164/3000 [10:39<04:56, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2165/3000 [10:40<04:38, 3.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2167/3000 [10:40<03:22, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2168/3000 [10:40<03:15, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2169/3000 [10:41<04:08, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2172/3000 [10:41<02:50, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2176/3000 [10:41<01:33, 8.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2179/3000 [10:43<04:14, 3.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2181/3000 [10:44<03:44, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2185/3000 [10:44<02:29, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2186/3000 [10:45<03:20, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2188/3000 [10:45<03:04, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2189/3000 [10:46<03:28, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2192/3000 [10:46<03:07, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2193/3000 [10:47<03:34, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2194/3000 [10:47<03:26, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2195/3000 [10:47<03:31, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2196/3000 [10:48<04:28, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2200/3000 [10:48<02:26, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2201/3000 [10:48<02:20, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2202/3000 [10:49<03:11, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2204/3000 [10:49<03:08, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2205/3000 [10:49<02:39, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2207/3000 [10:50<04:45, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2209/3000 [10:51<04:40, 2.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2212/3000 [10:51<02:51, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2214/3000 [10:52<03:42, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2215/3000 [10:53<03:54, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2217/3000 [10:53<03:18, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2218/3000 [10:53<02:48, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2219/3000 [10:53<02:55, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2220/3000 [10:54<03:12, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2222/3000 [10:54<02:54, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2224/3000 [10:54<02:03, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2225/3000 [10:55<02:54, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2226/3000 [10:55<03:57, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2228/3000 [10:56<03:11, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2232/3000 [10:57<03:22, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2233/3000 [10:57<02:57, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2235/3000 [10:58<04:15, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2236/3000 [10:59<05:16, 2.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2237/3000 [10:59<05:34, 2.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2240/3000 [11:00<03:16, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2243/3000 [11:00<02:17, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2244/3000 [11:00<02:38, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2246/3000 [11:01<02:59, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2249/3000 [11:01<03:00, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2250/3000 [11:02<02:53, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2251/3000 [11:02<03:14, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2254/3000 [11:03<04:03, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2256/3000 [11:04<03:39, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2259/3000 [11:04<02:29, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2261/3000 [11:04<01:50, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2263/3000 [11:05<03:12, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2266/3000 [11:06<02:25, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2268/3000 [11:06<02:55, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2270/3000 [11:07<03:02, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2271/3000 [11:08<03:42, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2272/3000 [11:09<07:44, 1.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2276/3000 [11:09<03:17, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2278/3000 [11:10<02:23, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2280/3000 [11:10<02:11, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2283/3000 [11:10<01:55, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2284/3000 [11:11<02:35, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2285/3000 [11:11<02:48, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2286/3000 [11:11<02:44, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2287/3000 [11:12<03:38, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2288/3000 [11:12<03:42, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2290/3000 [11:12<02:43, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2291/3000 [11:13<03:02, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2292/3000 [11:13<03:36, 3.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2294/3000 [11:14<03:43, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2296/3000 [11:15<03:41, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2297/3000 [11:15<04:10, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2302/3000 [11:16<02:27, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2303/3000 [11:16<02:24, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2304/3000 [11:16<02:30, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2305/3000 [11:17<03:05, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2307/3000 [11:17<03:30, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2308/3000 [11:18<03:16, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2310/3000 [11:18<02:37, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2312/3000 [11:19<03:16, 3.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2313/3000 [11:19<03:09, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2315/3000 [11:20<03:17, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2316/3000 [11:20<03:06, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2318/3000 [11:20<03:03, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2320/3000 [11:21<02:21, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2322/3000 [11:21<02:32, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2323/3000 [11:22<03:48, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2326/3000 [11:22<02:22, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2328/3000 [11:23<02:23, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2329/3000 [11:23<02:16, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2330/3000 [11:23<02:19, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2332/3000 [11:23<02:01, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2333/3000 [11:24<02:25, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2334/3000 [11:25<04:51, 2.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2336/3000 [11:25<03:52, 2.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2338/3000 [11:25<02:54, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2339/3000 [11:26<04:32, 2.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2342/3000 [11:27<02:58, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2343/3000 [11:27<03:03, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2344/3000 [11:27<03:07, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2347/3000 [11:28<01:58, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2348/3000 [11:28<02:04, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2350/3000 [11:28<01:40, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2351/3000 [11:28<02:01, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2353/3000 [11:29<02:21, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2356/3000 [11:30<02:28, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2357/3000 [11:30<02:21, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2359/3000 [11:30<02:14, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2360/3000 [11:31<02:28, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2361/3000 [11:31<03:06, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2363/3000 [11:31<02:36, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2365/3000 [11:32<01:56, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2366/3000 [11:32<01:47, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2367/3000 [11:32<02:58, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2368/3000 [11:33<02:50, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2370/3000 [11:33<02:16, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2371/3000 [11:33<02:39, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2374/3000 [11:34<03:09, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2375/3000 [11:35<03:19, 3.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2379/3000 [11:35<01:45, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2381/3000 [11:36<01:40, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2382/3000 [11:36<03:05, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2384/3000 [11:37<03:17, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2385/3000 [11:37<03:25, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2387/3000 [11:38<03:13, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2391/3000 [11:39<02:11, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2395/3000 [11:39<01:47, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2396/3000 [11:40<02:08, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2400/3000 [11:40<01:45, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2401/3000 [11:41<03:13, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2402/3000 [11:42<03:34, 2.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2403/3000 [11:42<03:24, 2.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2404/3000 [11:42<03:20, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2407/3000 [11:43<02:22, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2408/3000 [11:43<02:08, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2410/3000 [11:43<01:55, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2412/3000 [11:44<01:37, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2416/3000 [11:44<01:33, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2417/3000 [11:45<01:58, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2418/3000 [11:45<02:28, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2420/3000 [11:46<03:03, 3.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2421/3000 [11:46<02:49, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2422/3000 [11:46<02:39, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2425/3000 [11:47<02:12, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2427/3000 [11:47<02:09, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2429/3000 [11:48<02:14, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2430/3000 [11:49<03:12, 2.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2432/3000 [11:49<02:29, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2433/3000 [11:49<02:28, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2434/3000 [11:49<02:32, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2436/3000 [11:50<01:57, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2440/3000 [11:50<01:43, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2441/3000 [11:50<01:41, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2442/3000 [11:51<03:03, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2446/3000 [11:52<02:21, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2447/3000 [11:52<02:04, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2449/3000 [11:53<01:43, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2451/3000 [11:53<02:14, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2453/3000 [11:54<02:18, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2454/3000 [11:55<04:25, 2.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2455/3000 [11:56<04:11, 2.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2456/3000 [11:56<04:15, 2.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2459/3000 [11:57<02:28, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2461/3000 [11:57<01:46, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2464/3000 [11:57<01:21, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2467/3000 [11:58<01:36, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2469/3000 [11:59<02:05, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2470/3000 [11:59<02:10, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2471/3000 [12:00<03:36, 2.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2472/3000 [12:00<03:08, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2473/3000 [12:01<03:51, 2.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2477/3000 [12:01<01:52, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2479/3000 [12:02<02:32, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2480/3000 [12:02<03:03, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2482/3000 [12:03<02:18, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2483/3000 [12:03<02:48, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2485/3000 [12:04<02:31, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2486/3000 [12:04<02:07, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2488/3000 [12:04<01:49, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2489/3000 [12:04<02:03, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2490/3000 [12:05<02:04, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2493/3000 [12:05<01:35, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2495/3000 [12:05<01:28, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2496/3000 [12:06<02:37, 3.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2498/3000 [12:07<02:41, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2499/3000 [12:07<02:38, 3.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2500/3000 [12:07<02:24, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2502/3000 [12:08<02:10, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2504/3000 [12:09<02:41, 3.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2506/3000 [12:09<01:41, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2507/3000 [12:09<01:38, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2511/3000 [12:10<01:26, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2513/3000 [12:10<01:32, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2515/3000 [12:11<01:51, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2517/3000 [12:11<02:05, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2518/3000 [12:12<02:07, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2521/3000 [12:12<01:41, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2522/3000 [12:13<02:14, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2524/3000 [12:13<02:29, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2525/3000 [12:14<02:37, 3.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2528/3000 [12:14<01:57, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2531/3000 [12:15<01:16, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2533/3000 [12:16<02:10, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2536/3000 [12:16<01:38, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2537/3000 [12:16<01:29, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2538/3000 [12:17<01:51, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2539/3000 [12:17<01:48, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2542/3000 [12:18<01:42, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2543/3000 [12:18<02:48, 2.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2544/3000 [12:19<03:23, 2.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2547/3000 [12:20<02:25, 3.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2551/3000 [12:20<01:49, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2556/3000 [12:21<01:24, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2559/3000 [12:22<01:19, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2561/3000 [12:22<01:12, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2562/3000 [12:23<02:15, 3.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2564/3000 [12:24<02:07, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2567/3000 [12:24<02:00, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2568/3000 [12:25<02:09, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2570/3000 [12:25<01:37, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2572/3000 [12:26<01:46, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2575/3000 [12:26<01:20, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2577/3000 [12:26<01:15, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2579/3000 [12:27<00:59, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2581/3000 [12:27<01:24, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2583/3000 [12:27<01:02, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2584/3000 [12:28<01:26, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2585/3000 [12:29<02:32, 2.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2586/3000 [12:29<03:08, 2.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2588/3000 [12:30<02:14, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2589/3000 [12:30<02:27, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2592/3000 [12:30<01:36, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2596/3000 [12:31<01:11, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2597/3000 [12:31<01:06, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2599/3000 [12:32<01:19, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2604/3000 [12:32<00:57, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2605/3000 [12:32<00:58, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2606/3000 [12:34<03:08, 2.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2609/3000 [12:35<02:06, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2612/3000 [12:35<01:17, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2615/3000 [12:35<01:05, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2617/3000 [12:36<01:03, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2618/3000 [12:36<01:41, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2619/3000 [12:36<01:39, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2622/3000 [12:37<01:08, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2623/3000 [12:37<01:11, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2625/3000 [12:37<00:57, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2626/3000 [12:37<01:13, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2627/3000 [12:38<01:33, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2628/3000 [12:38<01:38, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2629/3000 [12:39<02:00, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2630/3000 [12:39<01:47, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2633/3000 [12:39<01:17, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2636/3000 [12:40<00:51, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2638/3000 [12:40<01:26, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2641/3000 [12:41<01:29, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2643/3000 [12:42<01:43, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2645/3000 [12:43<01:35, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2646/3000 [12:43<01:38, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2648/3000 [12:43<01:20, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2650/3000 [12:44<01:20, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2651/3000 [12:44<01:26, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2652/3000 [12:44<01:26, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2653/3000 [12:45<01:53, 3.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2655/3000 [12:45<01:23, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2658/3000 [12:46<01:23, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2660/3000 [12:46<01:14, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2664/3000 [12:47<00:57, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2665/3000 [12:47<01:18, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2668/3000 [12:48<01:28, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2670/3000 [12:49<01:35, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2672/3000 [12:49<01:34, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2676/3000 [12:50<01:10, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2678/3000 [12:50<01:11, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2681/3000 [12:52<01:38, 3.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2683/3000 [12:52<01:10, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2684/3000 [12:52<01:03, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2685/3000 [12:52<01:15, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2688/3000 [12:53<00:55, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2689/3000 [12:53<01:09, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2692/3000 [12:54<01:02, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2694/3000 [12:54<00:56, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2695/3000 [12:55<01:26, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2696/3000 [12:55<01:26, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2697/3000 [12:56<02:11, 2.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2698/3000 [12:56<01:58, 2.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2701/3000 [12:56<01:11, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2702/3000 [12:57<01:35, 3.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2705/3000 [12:57<01:04, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2708/3000 [12:58<00:43, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2710/3000 [12:58<00:49, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2712/3000 [12:58<00:50, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2713/3000 [12:59<01:07, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2714/3000 [13:00<01:23, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2715/3000 [13:00<01:48, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2719/3000 [13:01<00:58, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2721/3000 [13:01<00:58, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2724/3000 [13:02<00:53, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2728/3000 [13:02<00:49, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2730/3000 [13:03<00:52, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2732/3000 [13:03<00:56, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2734/3000 [13:04<01:21, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2735/3000 [13:05<01:45, 2.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2738/3000 [13:06<01:14, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2739/3000 [13:06<01:16, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2740/3000 [13:06<01:21, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2741/3000 [13:07<01:15, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2745/3000 [13:07<00:43, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2747/3000 [13:07<00:45, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2751/3000 [13:08<00:36, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2752/3000 [13:08<00:37, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2754/3000 [13:08<00:41, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2755/3000 [13:10<01:51, 2.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2757/3000 [13:11<01:31, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2762/3000 [13:11<00:44, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2764/3000 [13:12<00:43, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2768/3000 [13:12<00:34, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2769/3000 [13:12<00:39, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2771/3000 [13:13<00:39, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2772/3000 [13:13<00:49, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2773/3000 [13:14<00:58, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2774/3000 [13:15<01:36, 2.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2776/3000 [13:15<01:19, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2779/3000 [13:16<00:56, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [13:16<01:04, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2784/3000 [13:17<00:45, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2786/3000 [13:17<00:41, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2787/3000 [13:17<00:45, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2788/3000 [13:18<00:45, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2789/3000 [13:18<00:44, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2791/3000 [13:18<00:38, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2792/3000 [13:19<00:54, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2794/3000 [13:19<00:53, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2796/3000 [13:20<00:48, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2797/3000 [13:20<00:53, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2798/3000 [13:20<00:54, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2802/3000 [13:21<00:32, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2803/3000 [13:21<00:38, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2804/3000 [13:21<00:47, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2808/3000 [13:22<00:41, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2810/3000 [13:23<00:41, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2812/3000 [13:23<00:50, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2813/3000 [13:24<00:45, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2814/3000 [13:24<00:44, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2818/3000 [13:24<00:24, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2820/3000 [13:25<00:32, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2821/3000 [13:25<00:29, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2823/3000 [13:25<00:35, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2824/3000 [13:25<00:31, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2825/3000 [13:26<00:39, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2827/3000 [13:26<00:42, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2830/3000 [13:28<00:57, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2831/3000 [13:28<00:58, 2.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2833/3000 [13:28<00:42, 3.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2834/3000 [13:29<00:47, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2836/3000 [13:29<00:44, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2838/3000 [13:30<00:35, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2839/3000 [13:30<00:34, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2842/3000 [13:30<00:24, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2844/3000 [13:31<00:32, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2846/3000 [13:31<00:34, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2848/3000 [13:32<00:45, 3.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2850/3000 [13:32<00:29, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2852/3000 [13:33<00:29, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2853/3000 [13:33<00:43, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2856/3000 [13:34<00:31, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2858/3000 [13:34<00:28, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2860/3000 [13:34<00:23, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2862/3000 [13:35<00:32, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2863/3000 [13:36<00:43, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2864/3000 [13:36<00:40, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2867/3000 [13:36<00:26, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2868/3000 [13:37<00:26, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2871/3000 [13:37<00:22, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2872/3000 [13:37<00:23, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2873/3000 [13:38<00:35, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2875/3000 [13:39<00:43, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2879/3000 [13:40<00:26, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2883/3000 [13:40<00:22, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2886/3000 [13:41<00:16, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2888/3000 [13:41<00:19, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2889/3000 [13:41<00:22, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2892/3000 [13:42<00:18, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2893/3000 [13:43<00:32, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2894/3000 [13:43<00:36, 2.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2896/3000 [13:44<00:39, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2898/3000 [13:44<00:30, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2899/3000 [13:45<00:38, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2900/3000 [13:45<00:35, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2905/3000 [13:46<00:16, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2906/3000 [13:46<00:16, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2908/3000 [13:47<00:20, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2910/3000 [13:47<00:26, 3.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2911/3000 [13:48<00:25, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2913/3000 [13:48<00:22, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2914/3000 [13:48<00:23, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2915/3000 [13:49<00:27, 3.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2917/3000 [13:49<00:22, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2919/3000 [13:50<00:17, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2920/3000 [13:50<00:22, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2923/3000 [13:50<00:13, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2924/3000 [13:51<00:14, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2925/3000 [13:51<00:14, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2927/3000 [13:51<00:13, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2928/3000 [13:53<00:32, 2.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2932/3000 [13:53<00:15, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2934/3000 [13:54<00:14, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2937/3000 [13:54<00:15, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2940/3000 [13:55<00:11, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2942/3000 [13:55<00:09, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2944/3000 [13:56<00:13, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2946/3000 [13:56<00:09, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2947/3000 [13:57<00:15, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2949/3000 [13:57<00:12, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2952/3000 [13:58<00:09, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2953/3000 [13:59<00:15, 3.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2955/3000 [13:59<00:11, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2957/3000 [13:59<00:12, 3.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2958/3000 [14:00<00:14, 2.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2960/3000 [14:00<00:11, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2962/3000 [14:01<00:09, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2964/3000 [14:01<00:06, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2965/3000 [14:01<00:07, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2967/3000 [14:02<00:07, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2969/3000 [14:02<00:05, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2971/3000 [14:02<00:05, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2972/3000 [14:03<00:05, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2974/3000 [14:03<00:05, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2976/3000 [14:03<00:04, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2978/3000 [14:04<00:05, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2980/3000 [14:05<00:06, 3.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2981/3000 [14:05<00:06, 2.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2982/3000 [14:06<00:06, 2.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2983/3000 [14:06<00:06, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2985/3000 [14:07<00:04, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2989/3000 [14:07<00:01, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2990/3000 [14:07<00:01, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2991/3000 [14:07<00:01, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2992/3000 [14:08<00:02, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2994/3000 [14:08<00:01, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2996/3000 [14:09<00:01, 2.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2998/3000 [14:10<00:00, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2999/3000 [14:10<00:00, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [14:10<00:00, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:11:33.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mEvaluation metrics (before optimization): {'f1': 0.0, 'em': 0.0, 'acc': 0.4703333333333333}\u001b[0m\n",
"\u001b[32m2026-01-10 16:11:33.406\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mOptimizing workflow...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0/20 [00:00, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:11:33.411\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:11:55.421\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:13:16.505\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:13:21.238\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:13:21.239\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 1 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:15, 1.53s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:03<01:14, 1.54s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:03<00:34, 1.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:03<00:27, 1.64it/s]\u001b[A\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:04<00:11, 3.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:04<00:09, 4.17it/s]\u001b[A\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:04<00:08, 4.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:07, 4.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:05<00:09, 3.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:05<00:05, 6.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:05<00:06, 4.97it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:06<00:06, 5.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:06<00:06, 4.75it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:06<00:05, 5.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:06<00:07, 3.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:07<00:08, 3.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:08<00:07, 3.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:08<00:11, 2.08it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:09<00:04, 4.19it/s]\u001b[A\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:09<00:03, 5.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:09<00:01, 9.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:10<00:02, 5.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:11<00:02, 3.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:11<00:02, 3.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:12<00:01, 3.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:12<00:00, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:13<00:00, 3.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:14<00:00, 2.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:16<00:00, 3.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:13:37.455\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 1 metrics: {'f1': 0.12, 'em': 0.12, 'acc': 0.56}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 5%|▌ | 1/20 [02:04<39:16, 124.04s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:13:37.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:13:53.837\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:15:14.093\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:15:18.677\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:15:18.678\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 2 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:03<03:07, 3.84s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:04<01:25, 1.78s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:04<00:49, 1.06s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:04<00:35, 1.29it/s]\u001b[A\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:04<00:13, 3.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:05<00:12, 3.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:05<00:07, 5.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:05<00:07, 5.30it/s]\u001b[A\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:05<00:05, 6.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:05<00:05, 6.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:06<00:06, 4.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:06<00:06, 4.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:07<00:10, 3.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:07<00:09, 3.18it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:07<00:07, 3.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:08<00:09, 2.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:08<00:08, 3.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:08<00:09, 2.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:09<00:08, 2.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:09<00:07, 3.05it/s]\u001b[A\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:09<00:06, 3.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:10<00:05, 3.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:10<00:08, 2.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:10<00:06, 2.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:11<00:05, 3.52it/s]\u001b[A\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:11<00:02, 6.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:11<00:02, 5.89it/s]\u001b[A\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:11<00:01, 6.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:12<00:03, 3.61it/s]\u001b[A\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:12<00:01, 4.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:13<00:02, 2.70it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:13<00:01, 3.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:14<00:00, 4.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:15<00:01, 2.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:16<00:00, 2.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:16<00:00, 2.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:15:35.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 2 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.56}\u001b[0m\n",
"\u001b[32m2026-01-10 16:15:35.760\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.12, 'em': 0.12, 'acc': 0.56}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 10%|█ | 2/20 [04:02<36:12, 120.67s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:15:35.764\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:15:50.561\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:17:04.909\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:17:10.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:17:10.974\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 3 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:03, 1.30s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:02<01:05, 1.36s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:03<00:44, 1.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:04<00:41, 1.10it/s]\u001b[A\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:04<00:16, 2.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:04<00:13, 3.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:04<00:08, 4.80it/s]\u001b[A\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:07, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:05<00:08, 4.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:05<00:05, 6.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:05<00:04, 6.62it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:05<00:04, 6.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:06<00:04, 6.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:07<00:09, 3.00it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:07<00:08, 3.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:07<00:06, 4.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:08<00:08, 2.98it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:08<00:07, 2.93it/s]\u001b[A\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:08<00:06, 3.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:08<00:03, 5.13it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:09<00:03, 5.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:09<00:02, 6.21it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:09<00:02, 5.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:09<00:03, 5.28it/s]\u001b[A\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:09<00:02, 5.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:09<00:02, 6.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:10<00:03, 3.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:11<00:04, 2.67it/s]\u001b[A\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:11<00:02, 4.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:11<00:02, 3.21it/s]\u001b[A\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:11<00:02, 3.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:11<00:01, 4.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:12<00:01, 3.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:12<00:01, 3.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:13<00:01, 2.69it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:13<00:00, 3.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:13<00:00, 2.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:14<00:00, 1.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:15<00:00, 3.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:17:26.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 3 metrics: {'f1': 0.12, 'em': 0.12, 'acc': 0.54}\u001b[0m\n",
"\u001b[32m2026-01-10 16:17:26.129\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.12, 'em': 0.12, 'acc': 0.56}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 15%|█▌ | 3/20 [05:52<32:51, 115.97s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:17:26.132\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:17:40.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:19:08.385\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:19:14.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:19:14.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 4 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:36, 1.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:29, 1.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:23, 1.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:02<00:29, 1.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:24, 1.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:03<00:15, 2.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:04<00:12, 3.11it/s]\u001b[A\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:04<00:10, 3.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:09, 3.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:04<00:09, 3.74it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:05, 6.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:05<00:06, 5.27it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:05<00:03, 7.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:05<00:03, 8.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:06<00:03, 7.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:06<00:02, 8.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:07<00:04, 4.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:07<00:03, 5.05it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:07<00:02, 6.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:08<00:04, 3.90it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:08<00:04, 3.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:08<00:03, 3.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:09<00:03, 3.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:09<00:02, 4.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:10<00:01, 4.70it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:10<00:00, 6.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:10<00:00, 4.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:11<00:00, 4.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:11<00:00, 3.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:12<00:00, 2.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:12<00:00, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:19:27.471\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 4 metrics: {'f1': 0.26, 'em': 0.26, 'acc': 0.7}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 20%|██ | 4/20 [07:54<31:29, 118.09s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:19:27.472\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:19:43.732\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:21:12.286\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:21:20.246\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:21:20.247\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 5 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:34, 1.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:33, 1.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:24, 1.95it/s]\u001b[A\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:01<00:16, 2.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:15, 2.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:03<00:26, 1.69it/s]\u001b[A\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:03<00:19, 2.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:03<00:16, 2.57it/s]\u001b[A\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:03<00:12, 3.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:04<00:11, 3.34it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:08, 4.56it/s]\u001b[A\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:04<00:05, 6.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:04, 7.44it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:04<00:04, 7.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:04<00:04, 7.14it/s]\u001b[A\n",
"Evaluating workflow: 40%|████ | 20/50 [00:05<00:03, 9.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:05<00:03, 9.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:05<00:04, 6.10it/s]\u001b[A\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:05<00:03, 7.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:06<00:02, 7.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:06<00:03, 5.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:07<00:05, 3.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:08<00:05, 3.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:08<00:06, 2.64it/s]\u001b[A\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:08<00:04, 3.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:09<00:02, 4.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:09<00:01, 6.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:09<00:01, 5.01it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:09<00:01, 6.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:10<00:01, 5.67it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:10<00:00, 5.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:10<00:00, 5.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:10<00:00, 3.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:12<00:01, 1.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:15<00:00, 3.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:21:35.693\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 5 metrics: {'f1': 0.28, 'em': 0.28, 'acc': 0.66}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 25%|██▌ | 5/20 [10:02<30:26, 121.74s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:21:35.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:21:50.402\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:23:06.458\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:23:13.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:23:13.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 6 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:02<01:54, 2.34s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:03<00:41, 1.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:03<00:32, 1.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:03<00:27, 1.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:04<00:22, 1.93it/s]\u001b[A\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:04<00:16, 2.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:04<00:12, 3.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:04<00:12, 3.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:05<00:14, 2.80it/s]\u001b[A\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:05<00:06, 5.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:05<00:06, 5.77it/s]\u001b[A\n",
"Evaluating workflow: 30%|███ | 15/50 [00:05<00:05, 6.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:05<00:05, 6.62it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:05<00:04, 6.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:06<00:03, 8.95it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:07<00:08, 3.32it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:07<00:05, 4.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:07<00:05, 4.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:08<00:05, 4.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:08<00:05, 4.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:08<00:05, 4.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:09<00:06, 3.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:09<00:04, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:10<00:03, 4.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:10<00:03, 4.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:10<00:03, 3.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:11<00:02, 4.15it/s]\u001b[A\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:11<00:01, 6.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:11<00:01, 6.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:11<00:01, 5.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:12<00:01, 3.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:13<00:01, 3.11it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:13<00:00, 3.57it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:13<00:00, 3.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:14<00:00, 3.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:23:27.846\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 6 metrics: {'f1': 0.26, 'em': 0.26, 'acc': 0.72}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 30%|███ | 6/20 [11:54<27:38, 118.48s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:23:27.847\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:23:44.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:25:11.504\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:25:20.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:25:20.746\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 7 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<00:50, 1.04s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:41, 1.17it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:24, 1.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:02<00:17, 2.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:02<00:10, 4.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:02<00:14, 3.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:03<00:18, 2.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:03<00:16, 2.45it/s]\u001b[A\n",
"Evaluating workflow: 20%|██ | 10/50 [00:04<00:13, 3.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:09, 3.94it/s]\u001b[A\n",
"Evaluating workflow: 30%|███ | 15/50 [00:04<00:05, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:04<00:04, 7.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 19/50 [00:05<00:04, 7.00it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:05<00:03, 8.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:05<00:02, 9.58it/s]\u001b[A\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:05<00:01, 13.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:05<00:02, 9.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:06<00:02, 8.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:06<00:02, 6.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:07<00:02, 5.33it/s]\u001b[A\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:07<00:02, 5.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:08<00:03, 3.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:08<00:03, 3.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:08<00:03, 3.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:09<00:03, 2.72it/s]\u001b[A\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:09<00:02, 3.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:09<00:02, 3.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:10<00:01, 3.43it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:10<00:01, 4.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:10<00:01, 2.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:11<00:00, 2.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:12<00:00, 4.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:25:33.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 7 metrics: {'f1': 0.4, 'em': 0.4, 'acc': 0.76}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 35%|███▌ | 7/20 [13:59<26:08, 120.67s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:25:33.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:25:49.821\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:27:38.924\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:27:49.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:27:49.386\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 8 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:46, 1.05it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:36, 1.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:23, 1.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:16, 2.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:02<00:16, 2.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:03<00:15, 2.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:03<00:16, 2.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:03<00:12, 3.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:04<00:12, 3.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:04<00:09, 3.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:04<00:09, 3.77it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:06, 5.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:05<00:04, 7.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:05<00:03, 7.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:05<00:03, 7.35it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:05<00:03, 8.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:06<00:02, 8.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:06<00:02, 8.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:07<00:03, 4.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:08<00:05, 3.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:08<00:03, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:08<00:03, 3.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:09<00:03, 3.27it/s]\u001b[A\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:09<00:03, 3.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:09<00:02, 4.05it/s]\u001b[A\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:09<00:01, 4.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:10<00:01, 4.99it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:10<00:01, 5.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:10<00:01, 5.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:11<00:01, 3.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:11<00:00, 4.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:28:00.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 8 metrics: {'f1': 0.34, 'em': 0.34, 'acc': 0.62}\u001b[0m\n",
"\u001b[32m2026-01-10 16:28:00.921\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.4, 'em': 0.4, 'acc': 0.76}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 40%|████ | 8/20 [16:27<25:52, 129.34s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:28:00.925\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:28:23.153\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:29:45.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:29:52.633\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:29:52.634\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 9 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<00:58, 1.19s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:34, 1.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:01<00:11, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:02<00:18, 2.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:03<00:18, 2.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:03<00:16, 2.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:09, 4.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 15/50 [00:04<00:06, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:06, 5.38it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:04<00:06, 5.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:04<00:04, 6.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:05<00:04, 6.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:05<00:06, 4.82it/s]\u001b[A\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:05<00:05, 5.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:05<00:03, 7.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:06<00:03, 7.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:06<00:04, 5.04it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:06<00:03, 5.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:07<00:04, 4.43it/s]\u001b[A\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:07<00:03, 4.65it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:07<00:03, 5.28it/s]\u001b[A\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:07<00:02, 6.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:08<00:03, 3.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:09<00:03, 3.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:09<00:01, 5.61it/s]\u001b[A\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:09<00:01, 5.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:10<00:02, 3.16it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:10<00:01, 4.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:10<00:00, 4.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:10<00:00, 4.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:11<00:00, 3.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:12<00:00, 2.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:12<00:00, 4.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:30:05.179\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 9 metrics: {'f1': 0.38, 'em': 0.38, 'acc': 0.7}\u001b[0m\n",
"\u001b[32m2026-01-10 16:30:05.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.4, 'em': 0.4, 'acc': 0.76}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 45%|████▌ | 9/20 [18:31<23:25, 127.75s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:30:05.183\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:30:23.543\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:31:59.312\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:32:07.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:32:07.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 10 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:00, 1.23s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:39, 1.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:02<00:27, 1.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:15, 2.94it/s]\u001b[A\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:02<00:09, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:02<00:09, 4.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:03<00:17, 2.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:04<00:15, 2.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:09, 3.85it/s]\u001b[A\n",
"Evaluating workflow: 30%|███ | 15/50 [00:04<00:05, 6.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:04<00:04, 7.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:04<00:03, 8.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:05<00:05, 5.38it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:05<00:04, 6.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:06<00:04, 5.09it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:06<00:03, 5.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:07<00:03, 5.63it/s]\u001b[A\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:07<00:02, 7.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:07<00:02, 6.67it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:07<00:02, 5.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:08<00:02, 4.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:09<00:03, 3.14it/s]\u001b[A\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:09<00:02, 3.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:09<00:02, 3.14it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:10<00:01, 3.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:10<00:01, 3.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:10<00:01, 2.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:11<00:01, 2.25it/s]\u001b[A\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:11<00:00, 3.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:11<00:00, 4.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:12<00:00, 4.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:32:20.409\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 10 metrics: {'f1': 0.44, 'em': 0.44, 'acc': 0.66}\u001b[0m\n",
"\u001b[32m2026-01-10 16:32:20.410\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.4, 'em': 0.4, 'acc': 0.76}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 50%|█████ | 10/20 [20:47<21:40, 130.06s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:32:20.413\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:32:37.349\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:34:16.118\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:34:23.234\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:34:23.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 11 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:48, 1.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:33, 1.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:22, 2.05it/s]\u001b[A\n",
"Evaluating workflow: 10%|█ | 5/50 [00:01<00:10, 4.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:01<00:09, 4.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:02<00:07, 5.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:02<00:07, 5.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:02<00:07, 5.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:03<00:15, 2.58it/s]\u001b[A\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:03<00:09, 3.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:04<00:10, 3.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:07, 4.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:04<00:05, 5.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:04<00:05, 5.87it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:05<00:04, 5.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:05<00:03, 7.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:05<00:03, 6.60it/s]\u001b[A\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:05<00:03, 6.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:05<00:03, 7.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:06<00:01, 10.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:06<00:01, 11.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:06<00:01, 10.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:06<00:01, 8.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:07<00:01, 8.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:08<00:02, 3.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:09<00:01, 3.11it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:09<00:01, 3.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:09<00:01, 3.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:10<00:00, 3.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:10<00:00, 2.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:11<00:00, 2.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:12<00:00, 4.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:34:35.352\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 11 metrics: {'f1': 0.5, 'em': 0.5, 'acc': 0.74}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 55%|█████▌ | 11/20 [23:01<19:43, 131.55s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:34:35.353\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:34:51.786\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:36:30.610\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:36:37.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:36:37.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 12 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:34, 1.92s/it]\u001b[A\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:02<00:41, 1.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:02<00:18, 2.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:03<00:26, 1.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:03<00:23, 1.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:04<00:22, 1.88it/s]\u001b[A\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:04<00:12, 3.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:04<00:09, 4.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:05<00:12, 2.98it/s]\u001b[A\n",
"Evaluating workflow: 30%|███ | 15/50 [00:05<00:06, 5.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:05<00:05, 6.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:05<00:04, 6.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:06<00:04, 7.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:06<00:03, 7.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:06<00:03, 7.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:07<00:05, 4.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:07<00:04, 4.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:08<00:04, 4.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:08<00:04, 4.36it/s]\u001b[A\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:08<00:03, 4.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:08<00:04, 4.15it/s]\u001b[A\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:09<00:03, 4.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:09<00:04, 3.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:10<00:04, 3.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:10<00:03, 3.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:10<00:03, 2.99it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:11<00:02, 4.06it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:11<00:01, 3.54it/s]\u001b[A\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:11<00:01, 4.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:12<00:00, 5.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:13<00:00, 2.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:15<00:00, 1.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:21<00:00, 2.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:36:59.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 12 metrics: {'f1': 0.38, 'em': 0.38, 'acc': 0.7}\u001b[0m\n",
"\u001b[32m2026-01-10 16:36:59.288\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.5, 'em': 0.5, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 60%|██████ | 12/20 [25:25<18:02, 135.32s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:36:59.291\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:37:19.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:38:50.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:38:57.393\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:38:57.394\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 13 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:02<01:41, 2.07s/it]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 2/50 [00:02<00:46, 1.03it/s]\u001b[A\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:02<00:18, 2.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:03<00:24, 1.77it/s]\u001b[A\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:04<00:15, 2.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:04<00:13, 2.98it/s]\u001b[A\n",
"Evaluating workflow: 20%|██ | 10/50 [00:04<00:11, 3.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:04<00:06, 6.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 15/50 [00:04<00:06, 5.72it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:05<00:06, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:05<00:05, 6.26it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:05<00:04, 6.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:05<00:04, 7.04it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:05<00:04, 6.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:06<00:06, 4.01it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:06<00:05, 4.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:06<00:05, 5.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:07<00:08, 3.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:07<00:05, 4.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:07<00:05, 4.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:08<00:06, 3.41it/s]\u001b[A\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:08<00:05, 3.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:08<00:05, 3.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:08<00:04, 3.71it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:09<00:03, 4.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:10<00:02, 4.37it/s]\u001b[A\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:10<00:02, 4.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:10<00:02, 5.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:11<00:03, 2.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:12<00:03, 2.38it/s]\u001b[A\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:12<00:02, 2.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:13<00:02, 2.24it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:13<00:02, 1.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:14<00:01, 2.30it/s]\u001b[A\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:14<00:00, 2.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:17<00:00, 2.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:39:14.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 13 metrics: {'f1': 0.36, 'em': 0.36, 'acc': 0.7}\u001b[0m\n",
"\u001b[32m2026-01-10 16:39:14.706\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.5, 'em': 0.5, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 65%|██████▌ | 13/20 [27:41<15:47, 135.35s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:39:14.710\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:39:32.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:41:12.519\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:41:19.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:41:19.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 14 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:00, 1.23s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:31, 1.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:21, 2.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:01<00:08, 4.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:02<00:15, 2.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:03<00:18, 2.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:04<00:10, 3.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:04<00:09, 3.73it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:07, 4.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:05<00:08, 3.84it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:05<00:05, 5.19it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:05<00:05, 5.66it/s]\u001b[A\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:05<00:04, 6.84it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:05<00:04, 5.72it/s]\u001b[A\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:06<00:02, 9.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:06<00:02, 10.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:06<00:03, 5.92it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:07<00:03, 4.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:07<00:03, 4.60it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:08<00:02, 4.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:08<00:03, 3.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:09<00:02, 3.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:09<00:02, 3.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:10<00:01, 4.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:10<00:01, 3.28it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:11<00:01, 3.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:11<00:01, 3.04it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:11<00:00, 3.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:47<00:07, 7.70s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:48<00:00, 1.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:42:08.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 14 metrics: {'f1': 0.44, 'em': 0.44, 'acc': 0.68}\u001b[0m\n",
"\u001b[32m2026-01-10 16:42:08.145\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.5, 'em': 0.5, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 70%|███████ | 14/20 [30:34<14:41, 146.86s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:42:08.149\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:42:26.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:43:55.287\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:44:02.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:44:02.621\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 15 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:34, 1.92s/it]\u001b[A\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:02<00:42, 1.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:02<00:25, 1.83it/s]\u001b[A\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:13, 3.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:02<00:08, 4.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:03<00:18, 2.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:04<00:16, 2.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:04<00:13, 2.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:05<00:10, 3.46it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 15/50 [00:05<00:07, 4.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:05<00:07, 4.35it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:05<00:06, 4.85it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:05<00:06, 4.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:06<00:03, 7.54it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:06<00:03, 8.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:06<00:04, 5.41it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:07<00:04, 5.08it/s]\u001b[A\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:07<00:02, 7.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:07<00:03, 5.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:08<00:03, 4.66it/s]\u001b[A\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:08<00:02, 5.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:08<00:01, 8.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:09<00:02, 4.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:10<00:02, 3.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:11<00:02, 2.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:11<00:01, 3.27it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:11<00:00, 4.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:12<00:00, 3.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:12<00:00, 2.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:13<00:00, 1.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:15<00:00, 3.26it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:44:18.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 15 metrics: {'f1': 0.5, 'em': 0.5, 'acc': 0.82}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 75%|███████▌ | 15/20 [32:44<11:48, 141.75s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:44:18.066\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:44:36.100\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:45:58.823\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:46:05.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:46:05.558\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 16 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:40, 1.21it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:40, 1.19it/s]\u001b[A\n",
"Evaluating workflow: 10%|█ | 5/50 [00:01<00:12, 3.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:01<00:05, 7.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:02<00:07, 5.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:03<00:07, 4.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 15/50 [00:03<00:06, 5.39it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:03<00:06, 5.49it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:03<00:03, 8.15it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:04<00:04, 6.57it/s]\u001b[A\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:04<00:02, 8.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:04<00:03, 7.51it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:05<00:03, 5.55it/s]\u001b[A\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:05<00:02, 8.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:05<00:01, 8.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:05<00:01, 9.76it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:05<00:00, 11.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:06<00:01, 6.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:06<00:01, 6.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:07<00:00, 6.94it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:07<00:00, 8.42it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:10<00:00, 1.78it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:10<00:00, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:46:16.652\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 16 metrics: {'f1': 0.62, 'em': 0.62, 'acc': 0.74}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 80%|████████ | 16/20 [34:43<08:59, 134.78s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:46:16.653\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:46:28.576\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:47:27.554\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:47:34.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:47:34.218\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 17 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:22, 1.68s/it]\u001b[A\n",
"Evaluating workflow: 10%|█ | 5/50 [00:01<00:12, 3.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:03<00:15, 2.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:03<00:11, 3.35it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:04<00:09, 3.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 15/50 [00:04<00:07, 4.64it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:08, 3.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:05<00:05, 5.87it/s]\u001b[A\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:05<00:03, 7.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:05<00:02, 8.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:06<00:03, 6.27it/s]\u001b[A\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:06<00:02, 7.30it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:07<00:02, 6.77it/s]\u001b[A\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:07<00:01, 8.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:08<00:01, 5.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:08<00:02, 4.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:08<00:01, 4.51it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:09<00:01, 4.86it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:09<00:01, 4.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:09<00:00, 4.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:10<00:00, 4.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:13<00:00, 3.70it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:47:47.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 17 metrics: {'f1': 0.5, 'em': 0.5, 'acc': 0.8}\u001b[0m\n",
"\u001b[32m2026-01-10 16:47:47.857\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.62, 'em': 0.62, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 85%|████████▌ | 17/20 [36:14<06:05, 121.68s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:47:47.860\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:48:02.490\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:49:17.855\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:49:24.858\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:49:24.859\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 18 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:37, 1.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:28, 1.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:21, 2.16it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:15, 2.82it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 12%|█▏ | 6/50 [00:02<00:14, 2.99it/s]\u001b[A\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:02<00:11, 3.66it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:02<00:06, 5.87it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 24%|██▍ | 12/50 [00:03<00:07, 5.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:03<00:06, 5.91it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 15/50 [00:03<00:07, 4.62it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:03<00:05, 6.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:04<00:05, 5.43it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:04<00:05, 5.45it/s]\u001b[A\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:04<00:03, 7.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:04<00:02, 8.97it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:05<00:03, 6.68it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:05<00:02, 7.31it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:05<00:02, 7.14it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:06<00:02, 6.38it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:06<00:03, 5.12it/s]\u001b[A\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:06<00:01, 8.52it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:06<00:01, 9.72it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:07<00:00, 10.40it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:07<00:01, 5.86it/s]\u001b[A\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:07<00:01, 5.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:08<00:00, 5.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:08<00:00, 5.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:08<00:00, 4.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:09<00:00, 3.69it/s]\u001b[A\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:09<00:00, 4.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|██████████| 50/50 [00:09<00:00, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:49:34.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 18 metrics: {'f1': 0.5, 'em': 0.5, 'acc': 0.74}\u001b[0m\n",
"\u001b[32m2026-01-10 16:49:34.336\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.62, 'em': 0.62, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 90%|█████████ | 18/20 [38:00<03:54, 117.11s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:49:34.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:49:48.687\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:51:05.996\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:51:12.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:51:12.196\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 19 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:00<00:34, 1.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:36, 1.33it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:22, 2.13it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:01<00:08, 5.23it/s]\u001b[A\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:02<00:08, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[A\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:02<00:08, 4.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 20%|██ | 10/50 [00:02<00:10, 3.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:03<00:11, 3.39it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:03<00:08, 4.37it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 28%|██▊ | 14/50 [00:03<00:08, 4.21it/s]\u001b[A\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:03<00:05, 6.12it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:03<00:04, 6.68it/s]\u001b[A\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:04<00:03, 8.62it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:04<00:03, 8.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:04<00:03, 6.77it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:04<00:02, 9.02it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 56%|█████▌ | 28/50 [00:05<00:03, 7.18it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 58%|█████▊ | 29/50 [00:05<00:02, 7.07it/s]\u001b[A\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:05<00:02, 8.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 66%|██████▌ | 33/50 [00:06<00:02, 5.77it/s]\u001b[A\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:06<00:02, 7.07it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:06<00:01, 7.58it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:06<00:01, 6.50it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:07<00:01, 6.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:07<00:01, 6.74it/s]\u001b[A\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:07<00:01, 7.11it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:07<00:00, 7.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:08<00:00, 5.63it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:08<00:01, 3.82it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:08<00:00, 4.20it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 96%|█████████▌| 48/50 [00:08<00:00, 4.75it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 98%|█████████▊| 49/50 [00:10<00:00, 1.73it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:10<00:00, 4.61it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-10 16:51:23.170\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 19 metrics: {'f1': 0.5, 'em': 0.5, 'acc': 0.78}\u001b[0m\n",
"\u001b[32m2026-01-10 16:51:23.171\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.62, 'em': 0.62, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 95%|█████████▌| 19/20 [39:49<01:54, 114.62s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:51:23.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-10 16:51:39.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-10 16:52:54.608\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:00.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:00.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 20 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:01<01:01, 1.26s/it]\u001b[A\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 2/50 [00:01<00:30, 1.57it/s]\u001b[A\n",
"Evaluating workflow: 6%|▌ | 3/50 [00:01<00:18, 2.55it/s]\u001b[A\n",
"Evaluating workflow: 10%|█ | 5/50 [00:01<00:09, 4.83it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 14%|█▍ | 7/50 [00:02<00:14, 2.96it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 18%|█▊ | 9/50 [00:02<00:10, 3.93it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 22%|██▏ | 11/50 [00:03<00:08, 4.84it/s]\u001b[A\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:03<00:06, 5.89it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 30%|███ | 15/50 [00:03<00:05, 6.03it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 32%|███▏ | 16/50 [00:04<00:06, 5.10it/s]\u001b[A\n",
"Evaluating workflow: 34%|███▍ | 17/50 [00:04<00:05, 5.54it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 38%|███▊ | 19/50 [00:04<00:04, 6.21it/s]\u001b[A\n",
"Evaluating workflow: 42%|████▏ | 21/50 [00:04<00:03, 7.53it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 46%|████▌ | 23/50 [00:04<00:02, 9.33it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 50%|█████ | 25/50 [00:05<00:03, 7.47it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 54%|█████▍ | 27/50 [00:05<00:03, 7.35it/s]\u001b[A\n",
"Evaluating workflow: 60%|██████ | 30/50 [00:05<00:01, 10.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 64%|██████▍ | 32/50 [00:05<00:02, 7.59it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:06<00:02, 6.88it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 70%|███████ | 35/50 [00:06<00:02, 7.18it/s]\u001b[A\n",
"Evaluating workflow: 74%|███████▍ | 37/50 [00:06<00:01, 8.80it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 78%|███████▊ | 39/50 [00:06<00:01, 10.45it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 82%|████████▏ | 41/50 [00:07<00:01, 7.01it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:08<00:01, 3.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:08<00:01, 3.23it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 90%|█████████ | 45/50 [00:09<00:01, 2.69it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 92%|█████████▏| 46/50 [00:09<00:01, 2.29it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:10<00:01, 2.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:10<00:00, 4.56it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 16:53:11.999\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 20 metrics: {'f1': 0.48, 'em': 0.48, 'acc': 0.78}\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:12.001\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m291\u001b[0m - \u001b[1mMetrics are worse than the best snapshot which has {'f1': 0.62, 'em': 0.62, 'acc': 0.74}. Rolling back to the best snapshot.\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"100%|██████████| 20/20 [41:38<00:00, 124.93s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 16:53:12.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m300\u001b[0m - \u001b[1mReached the maximum number of steps 20. Optimization has finished.\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:12.005\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36msave_module\u001b[0m:\u001b[36m1204\u001b[0m - \u001b[1mSaving SequentialWorkFlowGraph to ./MolQA_textgrad_final.json\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:12.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.workflow.workflow_graph\u001b[0m:\u001b[36msave_module\u001b[0m:\u001b[36m1204\u001b[0m - \u001b[1mSaving SequentialWorkFlowGraph to ./MolQA_textgrad_best.json\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:12.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mrestore_best_graph\u001b[0m:\u001b[36m448\u001b[0m - \u001b[1mRestored the best graph from snapshot with metrics {'f1': 0.62, 'em': 0.62, 'acc': 0.74}\u001b[0m\n",
"\u001b[32m2026-01-10 16:53:12.009\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m10\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 1/3000 [00:01<1:09:09, 1.38s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 5/3000 [00:01<11:58, 4.17it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 7/3000 [00:03<21:32, 2.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 12/3000 [00:03<10:11, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 16/3000 [00:03<07:50, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 19/3000 [00:04<07:50, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 23/3000 [00:05<07:20, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 25/3000 [00:05<06:10, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 27/3000 [00:05<07:15, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 29/3000 [00:06<09:28, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 31/3000 [00:06<09:49, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 34/3000 [00:07<09:08, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 36/3000 [00:07<09:24, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 38/3000 [00:08<10:26, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 42/3000 [00:08<07:28, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 45/3000 [00:08<07:37, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 47/3000 [00:09<06:04, 8.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 51/3000 [00:09<05:29, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 53/3000 [00:09<05:11, 9.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 56/3000 [00:09<04:59, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 58/3000 [00:10<07:00, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 59/3000 [00:10<08:55, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 60/3000 [00:11<10:26, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 62/3000 [00:11<10:39, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 63/3000 [00:11<10:59, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 64/3000 [00:12<11:47, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 67/3000 [00:12<07:52, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 72/3000 [00:12<05:20, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 74/3000 [00:13<06:18, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 76/3000 [00:13<06:29, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 78/3000 [00:13<05:46, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 81/3000 [00:14<08:15, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 84/3000 [00:14<05:37, 8.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 86/3000 [00:14<06:25, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 88/3000 [00:15<07:31, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 92/3000 [00:16<07:48, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 94/3000 [00:16<09:41, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 95/3000 [00:16<11:02, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 97/3000 [00:17<09:57, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 99/3000 [00:17<07:04, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 102/3000 [00:17<05:43, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 105/3000 [00:17<04:47, 10.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:18<05:52, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 108/3000 [00:18<07:08, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 111/3000 [00:18<06:18, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 112/3000 [00:19<09:18, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 114/3000 [00:19<08:20, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 117/3000 [00:20<07:36, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 120/3000 [00:20<06:19, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 121/3000 [00:20<08:46, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 124/3000 [00:21<06:48, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 128/3000 [00:21<05:46, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 133/3000 [00:22<06:40, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 137/3000 [00:22<05:42, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 141/3000 [00:23<08:39, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 142/3000 [00:23<08:12, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 148/3000 [00:24<05:38, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 152/3000 [00:25<07:26, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 154/3000 [00:25<06:05, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 159/3000 [00:25<04:34, 10.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 161/3000 [00:26<04:27, 10.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 163/3000 [00:26<06:50, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 165/3000 [00:26<06:57, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 166/3000 [00:27<07:43, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 167/3000 [00:27<09:02, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 170/3000 [00:27<07:15, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 171/3000 [00:28<09:54, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 172/3000 [00:28<11:43, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 179/3000 [00:29<05:47, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 183/3000 [00:29<05:36, 8.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 186/3000 [00:30<07:52, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 188/3000 [00:30<06:55, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 189/3000 [00:30<08:03, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 190/3000 [00:31<08:34, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 193/3000 [00:31<07:21, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 197/3000 [00:32<06:38, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 199/3000 [00:32<07:16, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 204/3000 [00:33<05:49, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 206/3000 [00:33<05:48, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 208/3000 [00:33<06:26, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 209/3000 [00:33<06:49, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 213/3000 [00:34<06:01, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 215/3000 [00:34<06:27, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 218/3000 [00:34<04:59, 9.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 219/3000 [00:35<06:50, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 220/3000 [00:35<07:47, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 223/3000 [00:35<07:02, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 226/3000 [00:36<05:40, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 227/3000 [00:36<07:31, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 228/3000 [00:36<09:20, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 231/3000 [00:37<06:57, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 238/3000 [00:37<05:16, 8.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 240/3000 [00:38<06:49, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 243/3000 [00:38<06:46, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 246/3000 [00:39<05:21, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 248/3000 [00:39<05:47, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 250/3000 [00:39<06:42, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 251/3000 [00:40<08:31, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 255/3000 [00:40<06:12, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 257/3000 [00:40<05:17, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 259/3000 [00:41<05:59, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 261/3000 [00:41<07:35, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 263/3000 [00:42<08:35, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 265/3000 [00:42<06:04, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 267/3000 [00:42<07:15, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 269/3000 [00:42<06:51, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 273/3000 [00:43<04:30, 10.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 278/3000 [00:43<03:32, 12.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 280/3000 [00:43<05:30, 8.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 282/3000 [00:44<07:50, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 284/3000 [00:44<07:30, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 286/3000 [00:45<07:14, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 287/3000 [00:45<09:05, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 291/3000 [00:45<06:30, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 293/3000 [00:46<08:10, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 296/3000 [00:46<05:26, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 298/3000 [00:47<06:23, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 301/3000 [00:47<05:48, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 304/3000 [00:47<05:00, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [00:48<07:35, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 310/3000 [00:48<06:30, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 311/3000 [00:49<08:14, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 313/3000 [00:49<07:19, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 315/3000 [00:49<07:06, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 317/3000 [00:50<07:23, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 320/3000 [00:50<07:24, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 322/3000 [00:50<06:05, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 323/3000 [00:50<06:08, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 324/3000 [00:51<09:37, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 326/3000 [00:51<09:09, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 331/3000 [00:52<06:15, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 333/3000 [00:52<06:33, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 335/3000 [00:52<04:57, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 337/3000 [00:52<05:15, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [00:53<06:16, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 341/3000 [00:53<06:52, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 344/3000 [00:54<08:02, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 347/3000 [00:54<06:00, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 350/3000 [00:54<05:17, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 351/3000 [00:55<05:36, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 352/3000 [00:55<07:00, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 357/3000 [00:56<07:39, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 363/3000 [00:56<05:23, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 365/3000 [00:57<05:50, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 366/3000 [00:57<06:55, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 367/3000 [00:57<09:57, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 369/3000 [00:58<09:02, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 371/3000 [00:58<07:28, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 373/3000 [00:58<08:37, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 374/3000 [00:59<09:49, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 379/3000 [00:59<05:14, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 383/3000 [00:59<03:41, 11.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 387/3000 [01:00<05:20, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 389/3000 [01:00<05:36, 7.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 391/3000 [01:01<04:57, 8.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 393/3000 [01:01<05:17, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 394/3000 [01:01<05:28, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 395/3000 [01:01<08:13, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 397/3000 [01:02<10:12, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 401/3000 [01:03<07:37, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 403/3000 [01:03<07:22, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 404/3000 [01:03<08:56, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 405/3000 [01:04<09:15, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [01:04<06:45, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 412/3000 [01:04<05:36, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 415/3000 [01:05<04:46, 9.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 419/3000 [01:05<04:37, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 421/3000 [01:05<05:54, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 422/3000 [01:06<06:25, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 423/3000 [01:06<07:03, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 425/3000 [01:06<07:29, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 427/3000 [01:07<08:42, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 431/3000 [01:07<06:05, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 432/3000 [01:08<07:54, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 435/3000 [01:08<07:50, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 438/3000 [01:09<07:11, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 441/3000 [01:09<06:30, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 445/3000 [01:09<05:09, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 448/3000 [01:10<04:21, 9.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 452/3000 [01:10<04:27, 9.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 454/3000 [01:10<04:45, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 456/3000 [01:11<07:50, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 457/3000 [01:11<08:09, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 460/3000 [01:12<07:07, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 462/3000 [01:12<06:42, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 464/3000 [01:12<04:56, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 466/3000 [01:12<05:28, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 468/3000 [01:13<05:57, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 471/3000 [01:13<06:39, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 472/3000 [01:13<06:19, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 475/3000 [01:14<06:19, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 477/3000 [01:14<06:48, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 478/3000 [01:15<08:30, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 482/3000 [01:15<06:03, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 485/3000 [01:15<05:08, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 490/3000 [01:16<05:01, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 492/3000 [01:17<08:15, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 494/3000 [01:17<06:34, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 497/3000 [01:17<06:12, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 499/3000 [01:18<06:10, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 501/3000 [01:18<07:40, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 502/3000 [01:18<07:35, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 505/3000 [01:19<05:52, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 507/3000 [01:19<05:57, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 509/3000 [01:19<07:06, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 511/3000 [01:20<06:46, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 512/3000 [01:20<07:51, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 516/3000 [01:20<04:55, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 518/3000 [01:20<04:08, 10.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 520/3000 [01:21<04:53, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 522/3000 [01:21<06:44, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 524/3000 [01:22<06:18, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 526/3000 [01:22<07:21, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 529/3000 [01:23<07:33, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 530/3000 [01:23<07:00, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 532/3000 [01:23<07:49, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 533/3000 [01:24<08:28, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 538/3000 [01:24<04:41, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 539/3000 [01:24<06:28, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 543/3000 [01:25<05:48, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 544/3000 [01:25<08:15, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 547/3000 [01:26<07:03, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [01:26<06:52, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 551/3000 [01:26<05:47, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 553/3000 [01:27<06:27, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 555/3000 [01:27<05:59, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 560/3000 [01:27<03:25, 11.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 562/3000 [01:28<06:40, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 565/3000 [01:28<06:27, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 567/3000 [01:29<07:00, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 569/3000 [01:29<08:02, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 573/3000 [01:29<04:05, 9.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 575/3000 [01:30<04:26, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 577/3000 [01:30<05:35, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 581/3000 [01:31<05:03, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 583/3000 [01:31<06:13, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 584/3000 [01:31<07:17, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 585/3000 [01:32<07:47, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 588/3000 [01:32<06:29, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 590/3000 [01:32<06:55, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 594/3000 [01:33<04:40, 8.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 595/3000 [01:33<04:41, 8.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 597/3000 [01:33<05:17, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 599/3000 [01:34<07:33, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 602/3000 [01:34<04:54, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 604/3000 [01:34<05:49, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 605/3000 [01:34<05:22, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 608/3000 [01:35<04:46, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 610/3000 [01:35<04:54, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 613/3000 [01:36<05:51, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 614/3000 [01:36<07:39, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 618/3000 [01:37<07:06, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 619/3000 [01:37<06:38, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 622/3000 [01:37<05:59, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 624/3000 [01:37<04:49, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 626/3000 [01:38<04:40, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 627/3000 [01:38<07:33, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 630/3000 [01:38<05:55, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 631/3000 [01:39<06:27, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 633/3000 [01:39<07:46, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 634/3000 [01:39<06:59, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 638/3000 [01:40<04:42, 8.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 640/3000 [01:40<03:57, 9.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 642/3000 [01:40<04:03, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 644/3000 [01:40<05:28, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 646/3000 [01:41<07:11, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 648/3000 [01:42<08:16, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 650/3000 [01:42<06:47, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 654/3000 [01:42<04:31, 8.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 657/3000 [01:42<03:38, 10.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 659/3000 [01:43<03:39, 10.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 662/3000 [01:44<07:20, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 665/3000 [01:44<06:49, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 669/3000 [01:44<04:30, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 670/3000 [01:45<05:31, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 671/3000 [01:45<06:26, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 672/3000 [01:45<07:56, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 673/3000 [01:45<08:35, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 674/3000 [01:46<08:51, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 676/3000 [01:46<06:51, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 680/3000 [01:46<05:09, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 681/3000 [01:47<05:59, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 683/3000 [01:47<06:36, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 686/3000 [01:47<05:55, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 688/3000 [01:48<06:12, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 691/3000 [01:48<04:43, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 692/3000 [01:49<11:11, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 695/3000 [01:49<08:17, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 697/3000 [01:50<07:05, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 699/3000 [01:50<05:00, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 704/3000 [01:50<03:31, 10.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 706/3000 [01:51<04:59, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 707/3000 [01:51<06:04, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 709/3000 [01:51<06:33, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 712/3000 [01:52<05:14, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 713/3000 [01:52<06:04, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 716/3000 [01:52<06:13, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 717/3000 [01:53<07:33, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 719/3000 [01:53<08:42, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 720/3000 [01:54<08:48, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 722/3000 [01:54<07:12, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 724/3000 [01:54<06:02, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 726/3000 [01:54<06:24, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 727/3000 [01:55<06:42, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 730/3000 [01:55<06:54, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 734/3000 [01:56<04:36, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 737/3000 [01:56<03:54, 9.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 739/3000 [01:56<04:01, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 741/3000 [01:57<07:56, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 743/3000 [01:57<06:49, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 744/3000 [01:57<07:02, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 746/3000 [01:58<06:09, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 750/3000 [01:58<05:47, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 751/3000 [01:58<05:36, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 753/3000 [01:59<05:03, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 755/3000 [01:59<06:50, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 757/3000 [01:59<05:20, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 760/3000 [02:00<05:12, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 763/3000 [02:00<04:30, 8.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 764/3000 [02:00<04:38, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 766/3000 [02:01<06:29, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 769/3000 [02:01<05:46, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 772/3000 [02:02<06:25, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 774/3000 [02:02<06:30, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 775/3000 [02:02<07:16, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 777/3000 [02:03<07:35, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 781/3000 [02:03<05:21, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 783/3000 [02:03<04:35, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 785/3000 [02:04<04:18, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 788/3000 [02:04<04:52, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 789/3000 [02:04<04:49, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 792/3000 [02:05<04:59, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 793/3000 [02:05<06:33, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 796/3000 [02:06<08:10, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 797/3000 [02:06<08:27, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 799/3000 [02:06<06:48, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 801/3000 [02:07<07:49, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 804/3000 [02:07<04:50, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 806/3000 [02:08<06:32, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 808/3000 [02:08<06:16, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 810/3000 [02:08<06:23, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 813/3000 [02:09<05:14, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 815/3000 [02:09<04:33, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 817/3000 [02:09<05:25, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 819/3000 [02:09<04:30, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 822/3000 [02:10<05:30, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 825/3000 [02:10<04:53, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 830/3000 [02:11<04:02, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 832/3000 [02:11<05:55, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 837/3000 [02:12<06:39, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 839/3000 [02:13<06:16, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 841/3000 [02:13<05:47, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 843/3000 [02:14<06:52, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 844/3000 [02:14<06:32, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 848/3000 [02:14<04:38, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 849/3000 [02:14<05:46, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 850/3000 [02:15<07:44, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 855/3000 [02:15<05:48, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 856/3000 [02:16<07:03, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 858/3000 [02:16<07:15, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 859/3000 [02:16<06:51, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 862/3000 [02:17<05:42, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 864/3000 [02:17<06:51, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 866/3000 [02:18<07:16, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 867/3000 [02:18<07:36, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 871/3000 [02:18<04:41, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 872/3000 [02:19<05:11, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 875/3000 [02:19<05:58, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 878/3000 [02:20<05:45, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 880/3000 [02:20<06:03, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 882/3000 [02:20<06:34, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 885/3000 [02:21<05:17, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 886/3000 [02:21<05:09, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 889/3000 [02:21<05:49, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 890/3000 [02:22<06:28, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 893/3000 [02:22<06:09, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 894/3000 [02:23<07:00, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 895/3000 [02:23<07:22, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 897/3000 [02:23<06:47, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 900/3000 [02:24<05:56, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 901/3000 [02:24<05:30, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 904/3000 [02:24<04:34, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 908/3000 [02:24<03:41, 9.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 910/3000 [02:25<05:27, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 913/3000 [02:25<04:37, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 915/3000 [02:26<04:22, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 917/3000 [02:26<04:27, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 919/3000 [02:26<06:07, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 920/3000 [02:27<08:16, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 921/3000 [02:27<08:22, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 923/3000 [02:27<06:25, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 924/3000 [02:27<06:46, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 927/3000 [02:28<04:45, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 929/3000 [02:28<05:19, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 930/3000 [02:28<05:14, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 932/3000 [02:29<06:21, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 937/3000 [02:29<04:28, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 940/3000 [02:30<05:16, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 942/3000 [02:30<05:02, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 946/3000 [02:31<04:37, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 947/3000 [02:31<04:42, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 949/3000 [02:31<05:48, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 950/3000 [02:31<06:14, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 953/3000 [02:32<06:14, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 955/3000 [02:33<07:34, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 957/3000 [02:33<05:23, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 959/3000 [02:33<05:22, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 962/3000 [02:33<04:27, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 966/3000 [02:34<05:47, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 967/3000 [02:35<08:30, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 970/3000 [02:35<06:31, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 971/3000 [02:35<07:23, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 974/3000 [02:36<05:32, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 977/3000 [02:36<05:13, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 980/3000 [02:37<05:05, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 983/3000 [02:37<03:27, 9.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 985/3000 [02:37<03:41, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 987/3000 [02:38<05:08, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 989/3000 [02:38<07:59, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 992/3000 [02:39<05:02, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 994/3000 [02:39<05:34, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 998/3000 [02:39<04:24, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1000/3000 [02:40<06:06, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1003/3000 [02:40<05:12, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1004/3000 [02:41<05:57, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1008/3000 [02:42<06:03, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1010/3000 [02:42<04:48, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1011/3000 [02:42<05:23, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1014/3000 [02:42<05:25, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1015/3000 [02:43<05:40, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1018/3000 [02:43<04:37, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1019/3000 [02:43<05:34, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1020/3000 [02:43<05:52, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1021/3000 [02:44<06:58, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1023/3000 [02:44<06:03, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1025/3000 [02:44<05:10, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1028/3000 [02:45<05:24, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1029/3000 [02:45<04:59, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1031/3000 [02:45<04:26, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1032/3000 [02:46<07:29, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1036/3000 [02:46<05:12, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1038/3000 [02:47<05:26, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1040/3000 [02:47<05:50, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1041/3000 [02:48<08:32, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1043/3000 [02:48<06:59, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1047/3000 [02:48<04:40, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1050/3000 [02:49<05:04, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1052/3000 [02:49<04:53, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1055/3000 [02:49<03:48, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1056/3000 [02:49<03:42, 8.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1058/3000 [02:50<05:52, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1060/3000 [02:50<04:42, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1063/3000 [02:51<03:52, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1065/3000 [02:51<03:28, 9.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1067/3000 [02:52<07:06, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1068/3000 [02:52<06:19, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1071/3000 [02:52<04:42, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1072/3000 [02:52<05:07, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1075/3000 [02:53<04:15, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1079/3000 [02:54<06:42, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1082/3000 [02:54<04:36, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1086/3000 [02:55<04:09, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1089/3000 [02:55<04:54, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1090/3000 [02:56<07:11, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1092/3000 [02:56<06:32, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1095/3000 [02:56<04:35, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1099/3000 [02:57<04:54, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1104/3000 [02:58<04:12, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1106/3000 [02:58<04:27, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1108/3000 [02:58<05:07, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1111/3000 [02:59<05:43, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1117/3000 [03:00<03:51, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1119/3000 [03:00<04:08, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1120/3000 [03:00<05:34, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1122/3000 [03:01<05:38, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1123/3000 [03:01<08:15, 3.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1125/3000 [03:02<06:54, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1126/3000 [03:02<06:54, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1129/3000 [03:02<05:52, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1131/3000 [03:03<05:15, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1133/3000 [03:03<04:39, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1135/3000 [03:03<04:36, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1137/3000 [03:03<04:45, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1141/3000 [03:04<03:27, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1143/3000 [03:04<03:46, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1145/3000 [03:05<06:56, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1151/3000 [03:06<04:22, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1153/3000 [03:06<04:10, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1155/3000 [03:06<04:02, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1157/3000 [03:07<04:58, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1161/3000 [03:08<05:25, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1162/3000 [03:08<05:40, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1166/3000 [03:08<04:24, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [03:08<03:44, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1170/3000 [03:10<07:53, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1172/3000 [03:10<06:03, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1175/3000 [03:10<05:16, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1176/3000 [03:11<06:39, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1178/3000 [03:11<05:39, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1180/3000 [03:11<06:45, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1183/3000 [03:12<04:06, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1185/3000 [03:12<03:11, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1187/3000 [03:12<04:26, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1188/3000 [03:13<05:34, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1191/3000 [03:13<05:11, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1192/3000 [03:14<07:36, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1199/3000 [03:14<03:41, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1202/3000 [03:15<04:45, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1203/3000 [03:15<04:50, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1204/3000 [03:15<06:25, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1205/3000 [03:16<06:52, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1209/3000 [03:16<04:20, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1210/3000 [03:16<04:18, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1213/3000 [03:17<04:26, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1214/3000 [03:17<04:27, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1218/3000 [03:17<03:35, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1221/3000 [03:18<04:04, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1222/3000 [03:18<03:54, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1223/3000 [03:18<05:41, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1224/3000 [03:19<07:00, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1225/3000 [03:19<06:45, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1230/3000 [03:19<03:40, 8.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1234/3000 [03:20<04:03, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1236/3000 [03:20<03:38, 8.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1238/3000 [03:20<03:29, 8.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1239/3000 [03:21<04:54, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1241/3000 [03:21<04:49, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1246/3000 [03:22<03:06, 9.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1249/3000 [03:22<03:42, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1251/3000 [03:22<04:25, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1253/3000 [03:23<05:33, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1255/3000 [03:24<06:36, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1256/3000 [03:24<07:22, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1259/3000 [03:24<06:08, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1260/3000 [03:25<05:23, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1261/3000 [03:25<05:40, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1264/3000 [03:25<04:36, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1265/3000 [03:25<04:12, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1266/3000 [03:26<06:16, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1271/3000 [03:26<03:08, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1273/3000 [03:26<03:18, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1276/3000 [03:27<04:09, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1278/3000 [03:28<06:27, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1281/3000 [03:28<04:59, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1283/3000 [03:28<05:08, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1285/3000 [03:29<05:39, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1286/3000 [03:29<07:28, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1291/3000 [03:30<04:04, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1293/3000 [03:30<03:30, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1296/3000 [03:31<04:15, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1297/3000 [03:31<04:37, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1298/3000 [03:31<05:06, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1300/3000 [03:31<04:38, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1302/3000 [03:32<04:34, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1306/3000 [03:32<03:56, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1309/3000 [03:33<06:38, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1312/3000 [03:34<05:05, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1314/3000 [03:34<05:07, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1317/3000 [03:35<05:48, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1320/3000 [03:35<05:06, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1322/3000 [03:36<05:18, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1328/3000 [03:36<02:48, 9.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1330/3000 [03:36<02:42, 10.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1332/3000 [03:37<03:56, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1335/3000 [03:37<03:38, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1339/3000 [03:38<03:08, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1341/3000 [03:38<03:10, 8.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1343/3000 [03:39<06:07, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1348/3000 [03:39<03:54, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1352/3000 [03:40<04:26, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1353/3000 [03:40<05:09, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1356/3000 [03:41<04:47, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1359/3000 [03:41<04:04, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1361/3000 [03:41<03:06, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1364/3000 [03:42<03:45, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1365/3000 [03:42<03:45, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1366/3000 [03:42<04:47, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1369/3000 [03:42<03:39, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1370/3000 [03:43<04:08, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1373/3000 [03:43<03:43, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1374/3000 [03:44<07:55, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1376/3000 [03:44<06:27, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1380/3000 [03:45<04:24, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1385/3000 [03:45<02:42, 9.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1387/3000 [03:46<05:09, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1389/3000 [03:46<05:17, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1393/3000 [03:47<03:41, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1395/3000 [03:47<04:43, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1398/3000 [03:48<04:59, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1399/3000 [03:48<05:15, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1401/3000 [03:49<06:12, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1403/3000 [03:49<04:23, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1407/3000 [03:49<04:02, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1408/3000 [03:50<03:56, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1409/3000 [03:50<06:08, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1413/3000 [03:50<03:56, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1415/3000 [03:51<03:37, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1417/3000 [03:51<03:40, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1419/3000 [03:51<03:50, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1420/3000 [03:51<04:16, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1422/3000 [03:52<05:49, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1425/3000 [03:53<04:30, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1427/3000 [03:53<03:51, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1429/3000 [03:53<03:32, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1431/3000 [03:53<03:19, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1434/3000 [03:54<04:39, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1436/3000 [03:54<04:03, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1439/3000 [03:55<03:22, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1443/3000 [03:55<02:26, 10.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1445/3000 [03:55<02:24, 10.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1447/3000 [03:57<07:38, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1452/3000 [03:57<04:09, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1454/3000 [03:57<03:31, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1456/3000 [03:58<03:31, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1458/3000 [03:58<04:34, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 1461/3000 [03:59<04:32, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1465/3000 [03:59<03:59, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1466/3000 [03:59<04:02, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1468/3000 [04:00<03:37, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1469/3000 [04:00<05:28, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1471/3000 [04:00<04:45, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1473/3000 [04:01<05:14, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1474/3000 [04:01<06:21, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1476/3000 [04:02<05:38, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1477/3000 [04:02<05:26, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1482/3000 [04:02<03:08, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1484/3000 [04:02<02:34, 9.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1486/3000 [04:03<02:37, 9.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1489/3000 [04:03<03:16, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1490/3000 [04:03<04:01, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1491/3000 [04:04<05:21, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1495/3000 [04:04<03:15, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1497/3000 [04:04<02:59, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1500/3000 [04:05<05:24, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1505/3000 [04:06<02:48, 8.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1507/3000 [04:07<06:13, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1511/3000 [04:07<04:26, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1514/3000 [04:08<04:11, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1515/3000 [04:08<04:08, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1518/3000 [04:08<03:46, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1520/3000 [04:09<03:06, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1521/3000 [04:09<04:19, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1525/3000 [04:09<03:19, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1528/3000 [04:10<02:43, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1531/3000 [04:10<03:15, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1533/3000 [04:11<04:58, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1534/3000 [04:11<05:11, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1540/3000 [04:12<02:49, 8.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1543/3000 [04:12<04:23, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1545/3000 [04:13<03:45, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1549/3000 [04:13<02:33, 9.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1551/3000 [04:13<02:27, 9.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1553/3000 [04:14<02:55, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1555/3000 [04:14<04:03, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1558/3000 [04:14<03:17, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1563/3000 [04:15<02:31, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1565/3000 [04:15<02:31, 9.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1567/3000 [04:16<05:29, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1571/3000 [04:16<03:15, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1573/3000 [04:17<03:31, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▎ | 1575/3000 [04:17<04:40, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1580/3000 [04:18<03:02, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1583/3000 [04:18<02:31, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1586/3000 [04:18<02:30, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1589/3000 [04:19<03:47, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1591/3000 [04:19<03:01, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1594/3000 [04:20<03:50, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1596/3000 [04:20<03:26, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1597/3000 [04:20<03:47, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1600/3000 [04:20<03:16, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1603/3000 [04:21<02:36, 8.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1605/3000 [04:21<03:00, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1606/3000 [04:21<04:07, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1610/3000 [04:22<03:37, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1613/3000 [04:22<03:21, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1615/3000 [04:23<02:54, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1618/3000 [04:23<03:12, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1620/3000 [04:23<03:05, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1622/3000 [04:24<03:12, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1624/3000 [04:24<02:58, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1625/3000 [04:24<03:30, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1627/3000 [04:24<03:06, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1628/3000 [04:25<04:22, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1629/3000 [04:25<04:43, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1630/3000 [04:25<05:18, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1632/3000 [04:26<04:41, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1634/3000 [04:26<03:21, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1636/3000 [04:26<03:18, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1638/3000 [04:27<03:51, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1641/3000 [04:27<03:19, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1644/3000 [04:27<03:23, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1650/3000 [04:28<01:52, 11.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1652/3000 [04:28<01:57, 11.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1654/3000 [04:28<02:16, 9.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1656/3000 [04:29<03:52, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1658/3000 [04:29<04:19, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1659/3000 [04:30<04:08, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1661/3000 [04:30<03:42, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1663/3000 [04:30<03:26, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1665/3000 [04:31<03:41, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1669/3000 [04:31<02:40, 8.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1673/3000 [04:31<02:20, 9.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1675/3000 [04:32<02:24, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1678/3000 [04:32<02:27, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1681/3000 [04:32<02:19, 9.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1682/3000 [04:32<02:46, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1683/3000 [04:33<05:03, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1684/3000 [04:33<05:39, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1685/3000 [04:34<05:37, 3.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1688/3000 [04:34<04:18, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1690/3000 [04:34<03:34, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1691/3000 [04:34<03:21, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1693/3000 [04:35<03:44, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1697/3000 [04:35<02:00, 10.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1699/3000 [04:35<02:52, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1702/3000 [04:36<03:45, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [04:36<03:14, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1706/3000 [04:37<03:29, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1708/3000 [04:37<03:32, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1712/3000 [04:38<02:39, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1715/3000 [04:38<03:05, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1718/3000 [04:39<03:21, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1720/3000 [04:39<02:59, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1722/3000 [04:39<02:55, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1724/3000 [04:39<02:46, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1727/3000 [04:40<03:18, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1731/3000 [04:40<02:15, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1735/3000 [04:41<02:03, 10.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1739/3000 [04:42<03:33, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1741/3000 [04:42<03:02, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1745/3000 [04:42<02:39, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1747/3000 [04:43<02:13, 9.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1751/3000 [04:43<02:32, 8.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1753/3000 [04:43<02:48, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1754/3000 [04:44<02:49, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1756/3000 [04:44<03:19, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1760/3000 [04:45<03:26, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1761/3000 [04:45<03:16, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1762/3000 [04:45<03:36, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1765/3000 [04:46<03:14, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1767/3000 [04:46<02:27, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1769/3000 [04:46<03:00, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1776/3000 [04:47<01:48, 11.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1778/3000 [04:47<01:57, 10.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1780/3000 [04:48<03:11, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1783/3000 [04:48<03:03, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1784/3000 [04:48<03:04, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1786/3000 [04:48<03:04, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1789/3000 [04:49<04:27, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1792/3000 [04:50<03:37, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1795/3000 [04:50<02:44, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1797/3000 [04:50<02:39, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1802/3000 [04:51<02:05, 9.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1804/3000 [04:51<01:51, 10.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1806/3000 [04:51<02:03, 9.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1808/3000 [04:52<02:20, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1809/3000 [04:52<02:26, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1810/3000 [04:52<02:55, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1812/3000 [04:52<03:45, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1813/3000 [04:53<05:09, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1815/3000 [04:53<04:26, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1816/3000 [04:54<04:00, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1817/3000 [04:54<05:01, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1821/3000 [04:54<03:24, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1827/3000 [04:55<01:58, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1829/3000 [04:55<02:22, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1831/3000 [04:55<02:27, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1834/3000 [04:56<02:25, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1836/3000 [04:56<02:32, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1837/3000 [04:56<03:01, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1838/3000 [04:57<03:19, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1839/3000 [04:57<03:36, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1842/3000 [04:57<02:36, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1843/3000 [04:58<04:41, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1844/3000 [04:58<05:42, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1846/3000 [04:59<04:22, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1848/3000 [04:59<03:40, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1851/3000 [04:59<03:31, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1852/3000 [05:00<03:13, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1853/3000 [05:00<03:29, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1857/3000 [05:00<02:30, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1859/3000 [05:00<02:07, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1861/3000 [05:01<03:07, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1864/3000 [05:01<02:35, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1866/3000 [05:02<02:45, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1868/3000 [05:02<04:06, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1871/3000 [05:02<02:35, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1873/3000 [05:03<02:20, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1874/3000 [05:03<02:49, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1876/3000 [05:03<02:33, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1877/3000 [05:04<04:08, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1880/3000 [05:04<03:22, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1882/3000 [05:04<02:32, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1884/3000 [05:05<02:47, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1887/3000 [05:05<02:48, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1890/3000 [05:05<02:27, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1892/3000 [05:06<02:12, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1894/3000 [05:06<02:17, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1897/3000 [05:06<02:42, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1899/3000 [05:07<02:54, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1901/3000 [05:07<02:58, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1904/3000 [05:07<01:59, 9.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1906/3000 [05:08<03:19, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1908/3000 [05:08<03:27, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1912/3000 [05:09<02:01, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1914/3000 [05:09<01:52, 9.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1916/3000 [05:09<02:02, 8.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1918/3000 [05:10<02:45, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1921/3000 [05:10<02:43, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1925/3000 [05:11<02:27, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1926/3000 [05:11<02:37, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1929/3000 [05:11<02:24, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1931/3000 [05:11<02:19, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1933/3000 [05:12<02:41, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1935/3000 [05:12<02:49, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1937/3000 [05:12<02:33, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1938/3000 [05:13<03:05, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1940/3000 [05:13<03:17, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1942/3000 [05:13<02:54, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1944/3000 [05:14<02:52, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1945/3000 [05:14<03:14, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1950/3000 [05:15<02:31, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1955/3000 [05:15<01:42, 10.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1957/3000 [05:15<02:13, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1959/3000 [05:16<02:28, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1961/3000 [05:16<02:09, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1963/3000 [05:16<02:28, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1966/3000 [05:17<02:25, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1967/3000 [05:17<03:38, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1969/3000 [05:18<03:31, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1971/3000 [05:18<03:08, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1974/3000 [05:18<02:39, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1975/3000 [05:19<02:29, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1978/3000 [05:19<02:07, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1979/3000 [05:19<02:53, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1982/3000 [05:20<02:58, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1984/3000 [05:20<03:39, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1987/3000 [05:21<02:28, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1991/3000 [05:21<01:53, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1993/3000 [05:21<01:48, 9.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1995/3000 [05:21<01:49, 9.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1998/3000 [05:22<01:47, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2000/3000 [05:22<02:19, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2003/3000 [05:23<02:43, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2005/3000 [05:23<03:20, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2008/3000 [05:24<02:35, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2009/3000 [05:24<02:46, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2010/3000 [05:24<02:52, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2012/3000 [05:25<03:48, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2015/3000 [05:25<02:40, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2017/3000 [05:25<02:29, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2020/3000 [05:26<02:06, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2022/3000 [05:26<02:03, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2023/3000 [05:26<02:34, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2026/3000 [05:27<02:28, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2028/3000 [05:27<02:47, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2029/3000 [05:27<02:33, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2033/3000 [05:28<02:00, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2037/3000 [05:28<01:57, 8.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2042/3000 [05:28<01:15, 12.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2046/3000 [05:29<02:08, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2048/3000 [05:30<03:48, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2050/3000 [05:31<03:19, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2052/3000 [05:31<03:33, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2056/3000 [05:32<02:31, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2058/3000 [05:32<02:03, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2063/3000 [05:32<01:50, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2066/3000 [05:32<01:23, 11.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2068/3000 [05:33<01:31, 10.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2070/3000 [05:33<01:50, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2072/3000 [05:34<03:02, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2077/3000 [05:35<02:42, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2078/3000 [05:35<02:53, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2082/3000 [05:36<02:41, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2086/3000 [05:36<01:48, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2088/3000 [05:36<01:50, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2090/3000 [05:36<01:53, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2092/3000 [05:37<01:56, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2093/3000 [05:37<02:37, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2098/3000 [05:38<01:44, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2100/3000 [05:38<02:27, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2102/3000 [05:38<02:24, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2103/3000 [05:39<03:10, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2106/3000 [05:39<02:34, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2107/3000 [05:39<02:28, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2109/3000 [05:40<02:28, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2111/3000 [05:40<02:43, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2115/3000 [05:40<01:25, 10.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2117/3000 [05:41<01:19, 11.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2119/3000 [05:41<01:58, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2124/3000 [05:42<01:57, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2126/3000 [05:42<02:15, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2128/3000 [05:42<01:49, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2131/3000 [05:43<02:44, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2132/3000 [05:44<03:09, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2135/3000 [05:44<02:36, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2137/3000 [05:44<02:50, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2140/3000 [05:45<02:34, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2141/3000 [05:45<02:47, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2145/3000 [05:46<02:02, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2147/3000 [05:46<01:49, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2148/3000 [05:46<02:10, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2150/3000 [05:46<01:58, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2152/3000 [05:47<02:19, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2153/3000 [05:47<03:05, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2154/3000 [05:47<03:06, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2156/3000 [05:47<02:26, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2158/3000 [05:48<02:15, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2161/3000 [05:48<02:11, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2162/3000 [05:48<02:28, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2164/3000 [05:49<02:13, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2168/3000 [05:49<01:41, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2170/3000 [05:50<02:37, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2171/3000 [05:50<02:35, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2174/3000 [05:50<02:11, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2177/3000 [05:51<01:50, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2179/3000 [05:51<02:11, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2182/3000 [05:52<01:51, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2183/3000 [05:52<01:54, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2186/3000 [05:52<01:54, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2187/3000 [05:52<01:49, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2190/3000 [05:53<02:37, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2191/3000 [05:53<03:01, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2194/3000 [05:54<02:16, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2195/3000 [05:54<02:34, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2197/3000 [05:54<02:22, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2198/3000 [05:54<02:29, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2199/3000 [05:55<03:10, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2200/3000 [05:55<03:27, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2202/3000 [05:56<03:19, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2205/3000 [05:56<02:06, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2206/3000 [05:56<02:04, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2210/3000 [05:56<01:27, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2212/3000 [05:57<01:18, 10.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2214/3000 [05:57<01:42, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2219/3000 [05:58<01:57, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2223/3000 [05:58<01:17, 9.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2225/3000 [05:59<02:34, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2227/3000 [06:00<02:36, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2229/3000 [06:00<02:16, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2233/3000 [06:00<01:53, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2235/3000 [06:00<01:35, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2238/3000 [06:01<01:42, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2241/3000 [06:01<01:49, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2243/3000 [06:02<01:47, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2244/3000 [06:02<01:57, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2246/3000 [06:02<01:45, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2247/3000 [06:03<02:23, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2249/3000 [06:03<02:30, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2252/3000 [06:03<02:01, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2255/3000 [06:04<01:29, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2257/3000 [06:04<01:31, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2259/3000 [06:04<01:41, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2260/3000 [06:05<02:07, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2263/3000 [06:05<02:26, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2266/3000 [06:06<01:38, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2268/3000 [06:06<02:24, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2272/3000 [06:07<01:58, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2276/3000 [06:07<01:48, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2278/3000 [06:08<01:42, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2280/3000 [06:08<01:57, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2282/3000 [06:08<01:58, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2283/3000 [06:08<01:46, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2285/3000 [06:09<01:51, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2286/3000 [06:09<02:03, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2289/3000 [06:10<02:37, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2291/3000 [06:10<01:52, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2294/3000 [06:10<01:27, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2296/3000 [06:10<01:26, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2298/3000 [06:11<01:40, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2300/3000 [06:11<01:38, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2303/3000 [06:11<01:32, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2304/3000 [06:12<01:37, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2306/3000 [06:12<02:00, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2307/3000 [06:13<03:22, 3.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2311/3000 [06:13<01:53, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2313/3000 [06:14<02:25, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2316/3000 [06:14<01:55, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2318/3000 [06:14<01:38, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2321/3000 [06:15<01:16, 8.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2323/3000 [06:15<01:16, 8.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2327/3000 [06:15<01:19, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2329/3000 [06:16<02:06, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2332/3000 [06:16<01:32, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2333/3000 [06:17<02:09, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2335/3000 [06:17<02:30, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2338/3000 [06:18<02:22, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2341/3000 [06:18<01:37, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2343/3000 [06:18<01:37, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2347/3000 [06:19<01:07, 9.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2350/3000 [06:19<01:33, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2352/3000 [06:20<01:32, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2355/3000 [06:20<01:30, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2357/3000 [06:20<01:24, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2361/3000 [06:21<01:40, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2365/3000 [06:22<01:39, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2369/3000 [06:22<01:12, 8.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2371/3000 [06:22<01:20, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2373/3000 [06:23<01:54, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2377/3000 [06:23<01:36, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2380/3000 [06:24<01:09, 8.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2382/3000 [06:24<01:18, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2384/3000 [06:24<01:31, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2386/3000 [06:25<01:48, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2388/3000 [06:25<01:44, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2389/3000 [06:25<01:53, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2391/3000 [06:26<02:01, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2392/3000 [06:26<02:13, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2395/3000 [06:27<01:41, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2397/3000 [06:27<01:40, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2401/3000 [06:27<01:19, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2402/3000 [06:28<01:22, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2404/3000 [06:28<01:31, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2405/3000 [06:28<01:57, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2410/3000 [06:29<01:20, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2411/3000 [06:29<01:20, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2412/3000 [06:29<01:42, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2413/3000 [06:29<01:47, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2414/3000 [06:30<02:14, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2418/3000 [06:30<01:44, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2420/3000 [06:31<01:31, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2422/3000 [06:31<01:24, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2425/3000 [06:32<01:36, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2427/3000 [06:32<01:13, 7.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2431/3000 [06:32<01:13, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2434/3000 [06:32<01:00, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2436/3000 [06:33<01:33, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2438/3000 [06:33<01:43, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2440/3000 [06:34<01:27, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2441/3000 [06:34<01:36, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2442/3000 [06:34<01:56, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2444/3000 [06:35<02:03, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2447/3000 [06:35<01:36, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2451/3000 [06:35<01:09, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2454/3000 [06:36<01:29, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2458/3000 [06:36<01:02, 8.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2460/3000 [06:37<00:53, 10.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2463/3000 [06:38<01:51, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2464/3000 [06:38<01:45, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2466/3000 [06:38<01:30, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2470/3000 [06:39<01:12, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2471/3000 [06:39<01:12, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2473/3000 [06:39<01:37, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▎ | 2475/3000 [06:40<01:55, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2480/3000 [06:40<01:08, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2482/3000 [06:41<01:23, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2484/3000 [06:41<01:48, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2487/3000 [06:42<01:25, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2490/3000 [06:42<01:07, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2494/3000 [06:43<01:25, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2496/3000 [06:43<01:10, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2498/3000 [06:43<00:58, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2500/3000 [06:43<01:02, 7.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2502/3000 [06:44<01:04, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2505/3000 [06:44<00:50, 9.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2507/3000 [06:45<01:41, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2509/3000 [06:45<01:42, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2510/3000 [06:45<01:49, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2513/3000 [06:46<01:32, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2515/3000 [06:46<01:31, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2516/3000 [06:46<01:25, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2517/3000 [06:47<01:44, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2521/3000 [06:47<01:10, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2524/3000 [06:47<00:56, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2528/3000 [06:48<00:57, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2530/3000 [06:48<00:55, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2532/3000 [06:49<01:03, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2535/3000 [06:49<01:12, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2536/3000 [06:49<01:23, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2537/3000 [06:50<01:42, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2539/3000 [06:50<01:38, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2540/3000 [06:51<01:53, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2541/3000 [06:51<01:56, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2545/3000 [06:51<01:05, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2548/3000 [06:52<01:07, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2550/3000 [06:52<01:00, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2553/3000 [06:52<00:49, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2555/3000 [06:52<00:52, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2558/3000 [06:53<01:03, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2559/3000 [06:53<01:09, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2561/3000 [06:54<01:09, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2563/3000 [06:54<01:19, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2564/3000 [06:54<01:28, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2565/3000 [06:55<01:55, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2567/3000 [06:55<01:44, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2569/3000 [06:55<01:23, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2572/3000 [06:56<01:11, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2574/3000 [06:56<01:09, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2577/3000 [06:57<01:04, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2578/3000 [06:57<01:39, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2580/3000 [06:58<01:37, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2586/3000 [06:58<00:46, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2588/3000 [06:59<01:32, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2593/3000 [07:00<01:17, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2595/3000 [07:00<01:04, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2597/3000 [07:00<00:58, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2599/3000 [07:02<01:44, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2602/3000 [07:02<01:22, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2604/3000 [07:02<01:01, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2608/3000 [07:02<00:50, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2610/3000 [07:04<01:41, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2612/3000 [07:04<01:25, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2614/3000 [07:04<01:14, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2618/3000 [07:05<01:07, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2620/3000 [07:05<01:02, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2622/3000 [07:05<00:50, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2624/3000 [07:05<00:48, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2627/3000 [07:06<00:57, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2629/3000 [07:07<01:33, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2632/3000 [07:08<01:39, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2633/3000 [07:08<01:39, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2635/3000 [07:09<01:24, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2638/3000 [07:09<00:50, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2640/3000 [07:09<00:42, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2642/3000 [07:09<00:45, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2645/3000 [07:10<00:45, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2646/3000 [07:10<01:01, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2650/3000 [07:11<01:02, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2651/3000 [07:11<00:57, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2652/3000 [07:11<01:03, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2656/3000 [07:12<00:46, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2657/3000 [07:13<01:59, 2.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [07:13<01:52, 3.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2660/3000 [07:13<01:25, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2662/3000 [07:14<01:00, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2664/3000 [07:14<01:14, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2666/3000 [07:15<01:08, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2667/3000 [07:15<01:14, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2670/3000 [07:15<00:59, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2673/3000 [07:15<00:39, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2675/3000 [07:16<00:38, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2678/3000 [07:16<00:32, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2682/3000 [07:16<00:33, 9.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2683/3000 [07:17<00:43, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2684/3000 [07:17<00:53, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2685/3000 [07:17<00:59, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2687/3000 [07:18<00:57, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2688/3000 [07:19<01:57, 2.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2691/3000 [07:19<01:19, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2692/3000 [07:19<01:11, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2693/3000 [07:20<01:26, 3.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2694/3000 [07:20<01:20, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2698/3000 [07:21<00:57, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2699/3000 [07:21<00:52, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2703/3000 [07:21<00:49, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2705/3000 [07:22<00:43, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2706/3000 [07:22<00:52, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2710/3000 [07:22<00:42, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2712/3000 [07:23<00:51, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2713/3000 [07:23<00:49, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2716/3000 [07:24<00:41, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2717/3000 [07:24<00:50, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2720/3000 [07:25<00:53, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2721/3000 [07:26<01:54, 2.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2723/3000 [07:26<01:25, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2725/3000 [07:26<01:04, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2730/3000 [07:27<00:25, 10.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2732/3000 [07:27<00:23, 11.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2734/3000 [07:27<00:40, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2736/3000 [07:28<00:51, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2737/3000 [07:29<01:39, 2.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2743/3000 [07:30<00:46, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2747/3000 [07:31<00:48, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2748/3000 [07:31<00:44, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2750/3000 [07:32<01:07, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2751/3000 [07:32<01:03, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2754/3000 [07:33<00:54, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2757/3000 [07:33<00:38, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2758/3000 [07:33<00:54, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2761/3000 [07:34<00:43, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2762/3000 [07:34<00:57, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2763/3000 [07:34<01:07, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2766/3000 [07:35<00:47, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2769/3000 [07:35<00:35, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2771/3000 [07:35<00:29, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2773/3000 [07:36<00:38, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▎| 2775/3000 [07:36<00:38, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2776/3000 [07:37<00:50, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2779/3000 [07:37<00:42, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2780/3000 [07:37<00:44, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [07:38<00:55, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2782/3000 [07:38<01:04, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2784/3000 [07:38<00:47, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2787/3000 [07:39<00:57, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2788/3000 [07:40<01:03, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2790/3000 [07:40<00:56, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2792/3000 [07:41<00:49, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2794/3000 [07:41<00:42, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2798/3000 [07:41<00:23, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2799/3000 [07:42<00:43, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2802/3000 [07:42<00:30, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2804/3000 [07:42<00:22, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2809/3000 [07:43<00:18, 10.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2811/3000 [07:44<00:40, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2814/3000 [07:44<00:30, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2816/3000 [07:45<00:44, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2817/3000 [07:45<00:47, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2818/3000 [07:46<00:48, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2821/3000 [07:46<00:36, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2824/3000 [07:46<00:24, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2826/3000 [07:47<00:19, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2828/3000 [07:47<00:28, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2832/3000 [07:48<00:25, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2834/3000 [07:48<00:27, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2836/3000 [07:48<00:26, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2838/3000 [07:50<00:45, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2840/3000 [07:50<00:34, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2841/3000 [07:50<00:30, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2844/3000 [07:50<00:26, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2845/3000 [07:51<00:41, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2846/3000 [07:51<00:43, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2849/3000 [07:52<00:37, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2853/3000 [07:52<00:21, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2855/3000 [07:53<00:22, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2859/3000 [07:54<00:23, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2862/3000 [07:54<00:24, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2863/3000 [07:55<00:34, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2867/3000 [07:55<00:24, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2868/3000 [07:56<00:28, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2869/3000 [07:56<00:38, 3.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2872/3000 [07:57<00:29, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2873/3000 [07:57<00:31, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2874/3000 [07:57<00:30, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2876/3000 [07:57<00:23, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2878/3000 [07:58<00:27, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2881/3000 [07:59<00:22, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2884/3000 [07:59<00:18, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2887/3000 [08:00<00:28, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2888/3000 [08:00<00:31, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2891/3000 [08:01<00:21, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2893/3000 [08:01<00:18, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2895/3000 [08:01<00:16, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2897/3000 [08:02<00:27, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2902/3000 [08:03<00:16, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2904/3000 [08:03<00:13, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2905/3000 [08:04<00:21, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2908/3000 [08:05<00:23, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2912/3000 [08:05<00:16, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2913/3000 [08:05<00:15, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2914/3000 [08:06<00:21, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2918/3000 [08:06<00:14, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2919/3000 [08:07<00:18, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2920/3000 [08:07<00:21, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2922/3000 [08:08<00:16, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2923/3000 [08:08<00:16, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2927/3000 [08:08<00:11, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2929/3000 [08:09<00:11, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2930/3000 [08:09<00:12, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2932/3000 [08:09<00:12, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2934/3000 [08:10<00:10, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2936/3000 [08:10<00:16, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2937/3000 [08:10<00:13, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2940/3000 [08:11<00:10, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2942/3000 [08:11<00:12, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2943/3000 [08:12<00:13, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2947/3000 [08:12<00:08, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2948/3000 [08:13<00:11, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2950/3000 [08:13<00:10, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2952/3000 [08:13<00:08, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2953/3000 [08:14<00:11, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2954/3000 [08:15<00:15, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2958/3000 [08:15<00:09, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2959/3000 [08:15<00:08, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2960/3000 [08:16<00:09, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2962/3000 [08:16<00:08, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2965/3000 [08:17<00:06, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2967/3000 [08:17<00:04, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2968/3000 [08:17<00:07, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2970/3000 [08:18<00:07, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2972/3000 [08:18<00:05, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2975/3000 [08:18<00:03, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2977/3000 [08:19<00:04, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2979/3000 [08:19<00:03, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2980/3000 [08:20<00:04, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2981/3000 [08:20<00:05, 3.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2983/3000 [08:21<00:04, 3.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2988/3000 [08:21<00:01, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2990/3000 [08:22<00:02, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2994/3000 [08:22<00:00, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2995/3000 [08:23<00:01, 3.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2996/3000 [08:24<00:01, 2.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2997/3000 [08:25<00:01, 1.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2999/3000 [08:25<00:00, 2.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [08:26<00:00, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-10 17:01:38.485\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m13\u001b[0m - \u001b[1mEvaluation metrics (after optimization): {'f1': 0.4643333333333333, 'em': 0.4643333333333333, 'acc': 0.685}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")\n",
"\n",
"logger.info(\"Optimizing workflow...\")\n",
"textgrad_optimizer.optimize(benchmark, seed=8)\n",
"textgrad_optimizer.restore_best_graph()\n",
"\n",
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4b6f274d",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"{'f1': 0.4643333333333333, 'em': 0.4643333333333333, 'acc': 0.685}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "b59c472f",
"metadata": {},
"outputs": [],
"source": [
"#generated_workflow\n",
"qa_workflow = {\n",
" \"goal\": \"Provide a concise answer to the question using relevant context. The answer must be straightforward and avoid unnecessary explanations.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"generate_answer\",\n",
" \"description\": \"Extract and formulate an answer from the given context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"question\", \"type\": \"str\", \"required\": True, \"description\": \"The question that needs to be answered.\"},\n",
" {\"name\": \"context\", \"type\": \"str\", \"required\": True, \"description\": \"The background information pertinent to the question.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The direct answer to the question.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Use the context to determine the best answer to the question. Provide your final answer in a clear format, without extra commentary or reasoning.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ]\n",
"}\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question\"])\n",
" return {\"question\": problem, 'context':''}"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "2c62cea3",
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"\"\n",
"os.environ[\"AZURE_OPENAI_KEY\"] = \"\"\n",
"os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n",
"llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n",
" azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
" azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n",
" api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"\n",
"executor_llm = LiteLLM(config=llm_config)\n",
"optimizer_llm = LiteLLM(config=llm_config)\n",
"llm = executor_llm"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "d965a023",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 17:55:29.910\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hitqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/antibiotic_pred/ec_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-10 17:55:30.004\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hitqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/antibiotic_pred/ec_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-10 17:55:30.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hitqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/antibiotic_pred/ec_test.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = MolQA()\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(qa_workflow)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "67291165",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"3000"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._test_data)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "76b45e2f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-10 17:55:37.092\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 1/3000 [00:00<45:41, 1.09it/s]Task exception was never retrieved\n",
"future: exception=RuntimeError('Event loop is closed')>\n",
"Traceback (most recent call last):\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/tasks.py\", line 277, in __step\n",
" result = coro.send(None)\n",
" ^^^^^^^^^^^^^^^\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/utils.py\", line 873, in _client_async_logging_helper\n",
" GLOBAL_LOGGING_WORKER.ensure_initialized_and_enqueue(\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 322, in ensure_initialized_and_enqueue\n",
" self.enqueue(async_coroutine)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/litellm/litellm_core_utils/logging_worker.py\", line 131, in enqueue\n",
" self._queue.put_nowait(task)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 147, in put_nowait\n",
" self._wakeup_next(self._getters)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/queues.py\", line 63, in _wakeup_next\n",
" waiter.set_result(None)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 263, in set_result\n",
" self.__schedule_callbacks()\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/futures.py\", line 173, in __schedule_callbacks\n",
" self._loop.call_soon(callback, self, context=ctx)\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 762, in call_soon\n",
" self._check_closed()\n",
" File \"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/asyncio/base_events.py\", line 520, in _check_closed\n",
" raise RuntimeError('Event loop is closed')\n",
"RuntimeError: Event loop is closed\n",
"Evaluating workflow: 0%| | 3/3000 [00:01<14:52, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 6/3000 [00:01<09:43, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 11/3000 [00:01<04:00, 12.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 17/3000 [00:01<03:07, 15.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 22/3000 [00:02<03:39, 13.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 24/3000 [00:02<03:53, 12.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 29/3000 [00:02<04:13, 11.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 34/3000 [00:03<03:27, 14.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 37/3000 [00:03<03:29, 14.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 39/3000 [00:03<03:39, 13.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 44/3000 [00:03<03:15, 15.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 49/3000 [00:04<04:08, 11.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 51/3000 [00:04<04:05, 11.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 58/3000 [00:04<02:54, 16.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 60/3000 [00:05<03:05, 15.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 65/3000 [00:05<03:29, 13.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 67/3000 [00:05<03:47, 12.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 73/3000 [00:06<03:02, 16.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 78/3000 [00:06<02:30, 19.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 85/3000 [00:06<02:45, 17.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 88/3000 [00:06<02:51, 16.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 90/3000 [00:07<03:41, 13.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 95/3000 [00:07<03:18, 14.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 99/3000 [00:07<03:19, 14.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 102/3000 [00:07<02:54, 16.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 106/3000 [00:08<03:48, 12.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 110/3000 [00:08<03:17, 14.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 112/3000 [00:08<04:16, 11.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 117/3000 [00:09<03:26, 13.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 123/3000 [00:09<02:32, 18.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 126/3000 [00:09<02:31, 18.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 128/3000 [00:09<03:40, 13.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 131/3000 [00:10<03:11, 14.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:10<03:35, 13.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 137/3000 [00:10<03:45, 12.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 139/3000 [00:10<03:56, 12.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 143/3000 [00:10<02:56, 16.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 147/3000 [00:11<02:43, 17.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 151/3000 [00:11<03:04, 15.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:11<03:27, 13.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 157/3000 [00:12<03:51, 12.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 161/3000 [00:12<02:57, 15.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 165/3000 [00:12<03:45, 12.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 167/3000 [00:12<03:53, 12.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 175/3000 [00:13<02:32, 18.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 180/3000 [00:13<03:30, 13.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 185/3000 [00:13<03:02, 15.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 189/3000 [00:14<03:12, 14.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 193/3000 [00:14<02:36, 17.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 195/3000 [00:14<02:44, 17.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 200/3000 [00:14<02:46, 16.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 204/3000 [00:15<03:36, 12.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 206/3000 [00:15<03:40, 12.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 210/3000 [00:15<02:43, 17.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 215/3000 [00:15<03:02, 15.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 219/3000 [00:16<02:57, 15.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 225/3000 [00:16<02:24, 19.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 230/3000 [00:16<03:13, 14.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 232/3000 [00:16<03:02, 15.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 239/3000 [00:17<02:46, 16.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 241/3000 [00:17<03:11, 14.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 247/3000 [00:17<02:36, 17.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 252/3000 [00:18<02:46, 16.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 256/3000 [00:18<03:02, 15.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 259/3000 [00:18<03:25, 13.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 262/3000 [00:18<03:11, 14.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 266/3000 [00:19<02:55, 15.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 269/3000 [01:01<3:01:57, 4.00s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 271/3000 [01:01<2:12:35, 2.92s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 276/3000 [01:01<1:00:45, 1.34s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 282/3000 [01:02<26:43, 1.69it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 288/3000 [01:02<13:30, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 290/3000 [01:02<11:12, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 297/3000 [01:03<06:33, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 299/3000 [01:03<05:53, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 303/3000 [01:03<04:56, 9.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 310/3000 [01:03<02:53, 15.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 313/3000 [01:04<03:10, 14.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 315/3000 [01:04<05:00, 8.93it/s]Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.234968669)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.275982979)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.561575786)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099100.980529856)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.39343292)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.18885595)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.316794107)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.57022979)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099100.788353391)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.282348946)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.64948812)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.176831366)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099100.619821641)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.866438283)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.199536575)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.440489142)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099100.625893747)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.347229356)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.205620188)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099100.81612747)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.988447128)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099101.916797626)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.67690388)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.040575851)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.836855305)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.906540012)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.589246621)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.697556359)])']\n",
"connector: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.00347262)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.890430599)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.956646017)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.220517268)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.248973464)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.700841246)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.562591492)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.082856749)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.206997042)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.022830461)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.197883053)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099102.295594962)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.416501827)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.014970711)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.197191776)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.562635084)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.563896359)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.080669774)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.422963269)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.365649807)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.358693048)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.236119182)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.396485911)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.681120488)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.744047325)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.389506504)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.021219971)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099103.576289318)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed connector\n",
"connections: ['deque([(, 2099104.836761827)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.146185302)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.356110063)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.258725747)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.530412202)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.451220523)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099106.030364112)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.713934777)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.070418808)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.619673776)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.540051573)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.770350735)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.685607177)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.87776549)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.982016733)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099104.912797469)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.65687381)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.532142316)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099106.469669783)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099106.371697456)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099106.33539682)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099105.879528274)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099106.352312152)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099107.157352195)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 2099106.645018078)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: