{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "2a8e89d4",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n",
" warnings.warn(\n"
]
}
],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "54f40417",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from evoagentx.benchmark import PertQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "54fa1aa5",
"metadata": {},
"outputs": [],
"source": [
"from evoagentx.benchmark import PertQA\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" problem = \"Question: {}\\n\\nAnswer:\".format(example[\"question\"])\n",
" return {\"problem\": problem}\n",
"\n",
"\n",
"hotpotqa_graph_data = {\n",
" \"goal\": \"Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"answer_generate\",\n",
" \"description\": \"Answer the question based on the context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"problem\", \"type\": \"str\", \"required\": True, \"description\": \"The problem to solve.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The answer to the problem.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "1ebace55",
"metadata": {},
"outputs": [],
"source": [
"api_key = \"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\"\n",
"# Function to encode the image\n",
"import os\n",
"os.environ[\"OPENAI_API_KEY\"] = api_key\n",
"OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
"\n",
"\n",
"llm_config = OpenAILLMConfig(model=\"gpt-4o-mini-2024-07-18\", openai_key=OPENAI_API_KEY, top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"llm = OpenAILLM(config=llm_config)\n",
"executor_llm = OpenAILLM(config=llm_config)\n",
"optimizer_llm = OpenAILLM(config=llm_config)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "20e078fa",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-03 18:44:59.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/norman_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-03 18:44:59.007\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/norman_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-03 18:44:59.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/norman_test.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = PertQA(pertdata='norman')\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "78d5904e",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"# len(benchmark._fulldata)\n",
"benchmark._train_data = benchmark._train_data[0:50]\n",
"benchmark._dev_data = benchmark._dev_data[0:50]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "d686ee20",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-03 18:45:25.717\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n",
"Evaluating workflow: 0%| | 1/3000 [00:02<2:18:44, 2.78s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 3/3000 [00:03<40:28, 1.23it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 4/3000 [00:03<27:49, 1.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 8/3000 [00:03<09:50, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 10/3000 [00:03<07:14, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 12/3000 [00:04<06:49, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 14/3000 [00:05<13:45, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 15/3000 [00:05<13:28, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 16/3000 [00:05<14:08, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 18/3000 [00:06<13:13, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 22/3000 [00:06<08:56, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 26/3000 [00:07<05:05, 9.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 28/3000 [00:08<09:47, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 30/3000 [00:08<10:07, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 34/3000 [00:08<06:59, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 36/3000 [00:09<07:45, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 38/3000 [00:09<07:53, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 42/3000 [00:09<06:15, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 44/3000 [00:10<08:58, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 46/3000 [00:10<08:16, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 50/3000 [00:11<07:00, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 52/3000 [00:11<07:02, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 53/3000 [00:12<09:32, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 55/3000 [00:12<09:06, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 60/3000 [00:13<07:18, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 61/3000 [00:13<10:19, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 62/3000 [00:13<10:30, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 64/3000 [00:13<08:35, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 65/3000 [00:14<09:43, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 68/3000 [00:14<08:25, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 70/3000 [00:15<08:33, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 74/3000 [00:15<06:25, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 77/3000 [00:15<05:56, 8.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 80/3000 [00:15<05:08, 9.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 82/3000 [00:16<06:53, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 83/3000 [00:16<08:18, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 85/3000 [00:16<07:36, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 89/3000 [00:18<10:14, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 90/3000 [00:18<09:35, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 94/3000 [00:18<07:10, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 102/3000 [00:19<04:14, 11.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 104/3000 [00:19<05:43, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 106/3000 [00:20<08:00, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:20<08:43, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 108/3000 [00:20<09:38, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 110/3000 [00:21<08:58, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 112/3000 [00:21<09:24, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 114/3000 [00:21<07:59, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 118/3000 [00:22<05:57, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 122/3000 [00:22<04:39, 10.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 124/3000 [00:22<05:01, 9.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 126/3000 [00:23<06:46, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 127/3000 [00:23<08:41, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 130/3000 [00:24<08:08, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 131/3000 [00:24<09:18, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 134/3000 [00:24<06:51, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 137/3000 [00:25<06:09, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 140/3000 [00:25<06:59, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 143/3000 [00:26<07:20, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 145/3000 [00:26<05:52, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 146/3000 [00:26<10:10, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 148/3000 [00:27<10:52, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 150/3000 [00:27<09:56, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:27<06:30, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 156/3000 [00:28<06:03, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 160/3000 [00:28<06:01, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 165/3000 [00:29<05:21, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 169/3000 [00:30<06:40, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 172/3000 [00:30<09:01, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 174/3000 [00:31<07:51, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 178/3000 [00:31<05:19, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 183/3000 [00:31<04:29, 10.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 185/3000 [00:32<05:32, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 186/3000 [00:32<06:53, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 187/3000 [00:33<10:46, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 190/3000 [00:33<08:21, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 193/3000 [00:34<08:15, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 195/3000 [00:34<08:04, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 197/3000 [00:34<07:41, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 202/3000 [00:34<04:32, 10.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 206/3000 [00:35<04:44, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 208/3000 [00:35<06:39, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 211/3000 [00:36<08:42, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 212/3000 [00:36<08:06, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 213/3000 [00:37<09:07, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 214/3000 [00:37<10:21, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 217/3000 [00:37<08:31, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 222/3000 [00:38<04:30, 10.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 224/3000 [00:38<06:21, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 226/3000 [00:39<07:27, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 228/3000 [00:39<08:46, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 229/3000 [00:39<08:27, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 232/3000 [00:40<09:13, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 234/3000 [00:40<06:42, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 235/3000 [00:40<09:23, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 237/3000 [00:41<08:06, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 238/3000 [00:41<11:18, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 241/3000 [00:42<08:41, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 245/3000 [00:42<04:40, 9.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 247/3000 [00:42<04:43, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 249/3000 [00:42<04:34, 10.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 251/3000 [00:43<10:20, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 252/3000 [00:43<09:28, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 254/3000 [00:44<10:53, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 256/3000 [00:44<09:05, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 258/3000 [00:45<09:37, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 259/3000 [00:45<09:18, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 265/3000 [00:45<05:30, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 267/3000 [00:46<05:30, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 270/3000 [00:46<04:11, 10.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 272/3000 [00:46<07:40, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 273/3000 [00:47<08:55, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 274/3000 [00:47<09:01, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 276/3000 [00:47<08:23, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 278/3000 [00:48<07:10, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 279/3000 [00:48<07:58, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 284/3000 [00:48<05:07, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 287/3000 [00:49<04:01, 11.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 289/3000 [00:49<05:07, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 292/3000 [00:50<10:14, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 294/3000 [00:51<09:58, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 297/3000 [00:51<06:47, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 299/3000 [00:51<05:45, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 303/3000 [00:52<05:24, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 305/3000 [00:52<05:19, 8.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 307/3000 [00:52<07:20, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 309/3000 [00:53<06:35, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 311/3000 [00:53<08:16, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 313/3000 [00:53<07:55, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 314/3000 [00:54<09:12, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 316/3000 [00:54<08:35, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 318/3000 [00:54<08:20, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 319/3000 [00:55<09:50, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 320/3000 [00:55<09:54, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 325/3000 [00:55<05:14, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 328/3000 [00:56<04:46, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 330/3000 [00:56<04:32, 9.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 332/3000 [00:56<07:23, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 333/3000 [00:57<10:21, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 337/3000 [00:57<07:35, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 338/3000 [00:58<07:43, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [00:58<10:32, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 341/3000 [00:58<08:50, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 346/3000 [00:59<05:31, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 348/3000 [00:59<07:15, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 353/3000 [01:00<07:10, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 354/3000 [01:00<06:47, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 357/3000 [01:01<07:42, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 359/3000 [01:01<06:34, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 360/3000 [01:01<06:23, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 362/3000 [01:02<06:57, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 363/3000 [01:02<10:29, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 368/3000 [01:03<05:52, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 370/3000 [01:03<05:08, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 373/3000 [01:03<05:58, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 374/3000 [01:03<06:46, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▎ | 375/3000 [01:04<07:30, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 379/3000 [01:04<06:04, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 381/3000 [01:05<08:52, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 383/3000 [01:05<08:23, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 385/3000 [01:06<07:54, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 389/3000 [01:06<04:14, 10.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 391/3000 [01:06<06:02, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 393/3000 [01:07<06:28, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 396/3000 [01:07<06:39, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 398/3000 [01:07<06:16, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 399/3000 [01:08<10:08, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 401/3000 [01:08<08:25, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 406/3000 [01:09<04:25, 9.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 408/3000 [01:09<05:29, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 410/3000 [01:10<08:28, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 412/3000 [01:10<10:06, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 415/3000 [01:11<08:04, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 419/3000 [01:11<06:46, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 420/3000 [01:12<09:53, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 422/3000 [01:12<08:56, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 423/3000 [01:12<09:01, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 427/3000 [01:13<07:28, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 429/3000 [01:13<08:23, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 431/3000 [01:14<06:12, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 432/3000 [01:14<06:43, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 434/3000 [01:14<07:01, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 436/3000 [01:14<07:31, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 438/3000 [01:15<05:37, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 440/3000 [01:15<05:30, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 441/3000 [01:15<06:40, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 444/3000 [01:16<06:14, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 445/3000 [01:16<10:20, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 446/3000 [01:17<12:08, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 448/3000 [01:17<09:52, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 451/3000 [01:17<06:45, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 454/3000 [01:18<06:11, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 456/3000 [01:18<05:15, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 458/3000 [01:19<08:53, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 460/3000 [01:19<08:29, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 463/3000 [01:19<05:42, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 465/3000 [01:20<06:36, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 470/3000 [01:20<05:40, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 473/3000 [01:21<05:09, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 474/3000 [01:21<05:30, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 476/3000 [01:21<06:16, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 478/3000 [01:22<10:20, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 482/3000 [01:22<05:31, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 485/3000 [01:22<04:57, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 487/3000 [01:23<07:32, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 489/3000 [01:23<05:52, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 493/3000 [01:24<04:54, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 495/3000 [01:24<04:34, 9.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 500/3000 [01:25<07:23, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 502/3000 [01:26<07:04, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 504/3000 [01:26<06:33, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 506/3000 [01:26<07:34, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 509/3000 [01:27<06:27, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 511/3000 [01:27<06:24, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 513/3000 [01:27<05:49, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 514/3000 [01:28<07:47, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 518/3000 [01:28<07:33, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 519/3000 [01:29<07:27, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 522/3000 [01:29<06:05, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 525/3000 [01:30<07:59, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 527/3000 [01:30<08:34, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 528/3000 [01:31<10:45, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 531/3000 [01:31<09:20, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 534/3000 [01:31<05:33, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 539/3000 [01:32<04:21, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 541/3000 [01:32<06:51, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 543/3000 [01:33<06:53, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 547/3000 [01:33<05:28, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [01:34<08:22, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 550/3000 [01:34<07:49, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 554/3000 [01:35<05:41, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 556/3000 [01:35<06:15, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 559/3000 [01:35<04:21, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 561/3000 [01:36<07:28, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 564/3000 [01:36<06:20, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 566/3000 [01:36<05:26, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 569/3000 [01:37<05:43, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 570/3000 [01:37<05:38, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 572/3000 [01:37<07:41, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 574/3000 [01:38<06:40, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 575/3000 [01:38<09:53, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 578/3000 [01:39<08:22, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 580/3000 [01:39<06:05, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 582/3000 [01:39<07:20, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 584/3000 [01:40<05:41, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 585/3000 [01:40<06:49, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 586/3000 [01:40<08:23, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 590/3000 [01:41<05:47, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 593/3000 [01:41<06:15, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 596/3000 [01:42<06:14, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 598/3000 [01:42<05:46, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 599/3000 [01:42<06:02, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 601/3000 [01:42<06:44, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 603/3000 [01:43<05:51, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 604/3000 [01:43<05:36, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 605/3000 [01:43<06:35, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 607/3000 [01:43<05:42, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 610/3000 [01:44<06:39, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 613/3000 [01:44<04:22, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 615/3000 [01:44<04:57, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 617/3000 [01:45<06:24, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 619/3000 [01:45<06:30, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 620/3000 [01:46<10:01, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 625/3000 [01:46<05:09, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 627/3000 [01:47<08:05, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 632/3000 [01:47<05:37, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 634/3000 [01:48<05:33, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 639/3000 [01:48<04:59, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 641/3000 [01:49<04:28, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 643/3000 [01:49<05:40, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 645/3000 [01:50<07:37, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 646/3000 [01:50<07:59, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 648/3000 [01:50<07:24, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 651/3000 [01:51<06:40, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 654/3000 [01:51<06:19, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 655/3000 [01:51<06:03, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 656/3000 [01:52<06:37, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 657/3000 [01:52<08:12, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 661/3000 [01:52<06:46, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 663/3000 [01:53<07:10, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 666/3000 [01:53<06:24, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 668/3000 [01:54<06:32, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 669/3000 [01:54<08:51, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 671/3000 [01:55<09:19, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 676/3000 [01:55<04:15, 9.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 678/3000 [01:55<04:33, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 682/3000 [01:56<04:13, 9.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 684/3000 [01:56<05:30, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 686/3000 [01:56<06:01, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 687/3000 [01:57<07:49, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 690/3000 [01:58<07:45, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 693/3000 [01:58<05:26, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 698/3000 [01:58<03:12, 11.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 700/3000 [01:58<03:12, 11.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 702/3000 [01:59<06:16, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 704/3000 [01:59<07:05, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 705/3000 [02:00<08:44, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 708/3000 [02:00<06:46, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 713/3000 [02:01<05:12, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 715/3000 [02:01<04:38, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 717/3000 [02:02<06:36, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 719/3000 [02:02<08:12, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 720/3000 [02:03<08:17, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 722/3000 [02:03<07:48, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 724/3000 [02:03<07:30, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 727/3000 [02:03<04:47, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 729/3000 [02:04<05:27, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 732/3000 [02:04<04:40, 8.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 736/3000 [02:04<03:53, 9.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 739/3000 [02:05<06:26, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 744/3000 [02:06<05:10, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 746/3000 [02:06<05:19, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 749/3000 [02:07<04:22, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 751/3000 [02:07<05:34, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 755/3000 [02:07<04:38, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 756/3000 [02:08<07:15, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 759/3000 [02:09<08:07, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 763/3000 [02:09<04:38, 8.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 765/3000 [02:09<05:17, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 767/3000 [02:10<05:25, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 769/3000 [02:10<05:49, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 770/3000 [02:10<05:49, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 776/3000 [02:11<06:24, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 779/3000 [02:11<04:24, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 781/3000 [02:12<05:37, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 783/3000 [02:13<08:47, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 786/3000 [02:13<06:57, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 789/3000 [02:13<05:14, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 791/3000 [02:14<06:02, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 793/3000 [02:14<04:59, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 795/3000 [02:14<04:37, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 797/3000 [02:15<05:47, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 798/3000 [02:15<05:40, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 800/3000 [02:15<06:01, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 803/3000 [02:15<04:29, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 804/3000 [02:16<05:10, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 805/3000 [02:16<06:06, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 809/3000 [02:17<07:06, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 814/3000 [02:17<04:43, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 816/3000 [02:17<04:07, 8.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 820/3000 [02:18<05:25, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 825/3000 [02:19<05:52, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 829/3000 [02:20<04:55, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 831/3000 [02:20<05:33, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 833/3000 [02:20<05:01, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 834/3000 [02:20<05:48, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 835/3000 [02:21<07:53, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 839/3000 [02:21<04:59, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 842/3000 [02:21<03:42, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 844/3000 [02:22<04:34, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 847/3000 [02:22<05:54, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 849/3000 [02:23<04:57, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 852/3000 [02:23<04:16, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 853/3000 [02:23<05:46, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 857/3000 [02:24<05:30, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 859/3000 [02:24<05:46, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 860/3000 [02:25<06:23, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 863/3000 [02:25<05:54, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 864/3000 [02:25<06:21, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 868/3000 [02:26<05:26, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 871/3000 [02:26<04:45, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 873/3000 [02:26<04:20, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 875/3000 [02:27<04:14, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 878/3000 [02:27<05:37, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 879/3000 [02:28<06:20, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 882/3000 [02:28<06:30, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 885/3000 [02:29<05:56, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 886/3000 [02:29<06:05, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 890/3000 [02:29<04:26, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [02:29<04:29, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 893/3000 [02:30<05:39, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 895/3000 [02:30<05:02, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 898/3000 [02:30<05:09, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 903/3000 [02:31<05:26, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 905/3000 [02:31<04:29, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 907/3000 [02:32<04:22, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 909/3000 [02:32<05:47, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 912/3000 [02:33<05:23, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 913/3000 [02:33<05:02, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 914/3000 [02:33<05:41, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 917/3000 [02:34<05:57, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 921/3000 [02:34<05:00, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 922/3000 [02:34<05:14, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 924/3000 [02:34<05:05, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 925/3000 [02:35<06:51, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 927/3000 [02:35<06:34, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 930/3000 [02:36<06:28, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 934/3000 [02:36<03:37, 9.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 936/3000 [02:36<04:27, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 937/3000 [02:37<05:36, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 942/3000 [02:37<04:37, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 943/3000 [02:38<05:56, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 945/3000 [02:38<06:19, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 947/3000 [02:38<06:25, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 950/3000 [02:39<05:14, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 952/3000 [02:39<04:59, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 954/3000 [02:39<05:24, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 957/3000 [02:40<04:00, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 958/3000 [02:40<06:22, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 961/3000 [02:40<05:11, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 963/3000 [02:41<05:35, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 966/3000 [02:41<04:59, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 967/3000 [02:42<05:49, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 970/3000 [02:42<04:48, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 971/3000 [02:42<04:35, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 972/3000 [02:43<06:52, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▎ | 975/3000 [02:43<04:49, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 978/3000 [02:43<03:07, 10.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 980/3000 [02:43<04:23, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 982/3000 [02:44<07:59, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 985/3000 [02:45<05:37, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 987/3000 [02:45<05:25, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 990/3000 [02:46<06:30, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 994/3000 [02:46<06:41, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 995/3000 [02:47<07:02, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 997/3000 [02:47<06:58, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 999/3000 [02:47<06:02, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1001/3000 [02:48<04:32, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1002/3000 [02:48<05:28, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1005/3000 [02:48<04:33, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1006/3000 [02:48<04:32, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1010/3000 [02:49<03:49, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1011/3000 [02:49<05:22, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1013/3000 [02:50<05:36, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1014/3000 [02:50<06:34, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1016/3000 [02:50<05:53, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1018/3000 [02:51<06:03, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1019/3000 [02:51<06:43, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1022/3000 [02:52<06:49, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1024/3000 [02:52<05:19, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1026/3000 [02:52<06:25, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1031/3000 [02:53<05:43, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1035/3000 [02:53<03:47, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1037/3000 [02:54<05:03, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1041/3000 [02:54<04:43, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1042/3000 [02:55<04:49, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1044/3000 [02:55<05:59, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1047/3000 [02:56<06:30, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1053/3000 [02:56<03:10, 10.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1055/3000 [02:56<03:46, 8.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1057/3000 [02:57<04:45, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1060/3000 [02:57<03:43, 8.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1062/3000 [02:57<04:00, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1064/3000 [02:58<06:54, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1065/3000 [02:59<07:23, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1067/3000 [02:59<06:25, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1070/3000 [02:59<05:04, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1071/3000 [02:59<05:55, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1075/3000 [03:00<04:22, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1077/3000 [03:00<04:29, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1080/3000 [03:00<03:03, 10.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1082/3000 [03:01<03:46, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1084/3000 [03:01<05:34, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1085/3000 [03:01<06:18, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1086/3000 [03:02<06:33, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1089/3000 [03:02<05:45, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1090/3000 [03:03<06:15, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1092/3000 [03:03<06:01, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1096/3000 [03:03<03:59, 7.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1099/3000 [03:03<03:00, 10.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1101/3000 [03:04<03:47, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1103/3000 [03:04<04:24, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1105/3000 [03:05<05:50, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1107/3000 [03:05<06:55, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1111/3000 [03:06<05:10, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1113/3000 [03:06<05:05, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1116/3000 [03:06<03:57, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1120/3000 [03:07<03:19, 9.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1122/3000 [03:07<03:57, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1125/3000 [03:08<04:55, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1129/3000 [03:09<05:09, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1131/3000 [03:09<04:06, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1136/3000 [03:09<03:09, 9.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1138/3000 [03:10<05:32, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1141/3000 [03:10<05:22, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1142/3000 [03:11<05:25, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1144/3000 [03:11<06:12, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1147/3000 [03:11<05:18, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1149/3000 [03:12<04:53, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1151/3000 [03:12<04:02, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1154/3000 [03:12<04:29, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1157/3000 [03:13<03:18, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1161/3000 [03:13<04:18, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1163/3000 [03:14<05:11, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1166/3000 [03:14<04:00, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [03:14<03:38, 8.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1170/3000 [03:15<03:43, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1171/3000 [03:15<07:16, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1174/3000 [03:16<05:38, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1176/3000 [03:16<04:57, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1177/3000 [03:16<04:44, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1181/3000 [03:17<03:44, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1183/3000 [03:17<03:22, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1186/3000 [03:17<03:25, 8.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1189/3000 [03:17<03:13, 9.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1190/3000 [03:18<03:56, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1192/3000 [03:18<06:31, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1194/3000 [03:19<06:44, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1196/3000 [03:19<06:07, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1199/3000 [03:20<04:52, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1205/3000 [03:20<03:15, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1208/3000 [03:20<02:25, 12.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1210/3000 [03:21<04:42, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1212/3000 [03:21<05:05, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1217/3000 [03:22<04:52, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1219/3000 [03:23<04:49, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1224/3000 [03:23<03:41, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1227/3000 [03:23<02:55, 10.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1229/3000 [03:23<02:55, 10.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1231/3000 [03:24<05:16, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1233/3000 [03:25<04:53, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1234/3000 [03:25<05:28, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1235/3000 [03:25<07:19, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1237/3000 [03:26<05:53, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1241/3000 [03:26<03:43, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1243/3000 [03:26<03:59, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1245/3000 [03:27<04:53, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1247/3000 [03:27<03:58, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1249/3000 [03:27<03:50, 7.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1250/3000 [03:27<04:00, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1252/3000 [03:27<04:23, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1254/3000 [03:28<05:44, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1256/3000 [03:29<07:18, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1263/3000 [03:29<03:35, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1265/3000 [03:30<04:15, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1267/3000 [03:30<05:17, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1269/3000 [03:31<06:05, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1272/3000 [03:31<05:51, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1277/3000 [03:32<03:42, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1279/3000 [03:32<03:50, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1281/3000 [03:32<04:29, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1284/3000 [03:33<04:36, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1286/3000 [03:33<04:42, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1287/3000 [03:34<04:31, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1293/3000 [03:34<02:44, 10.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1295/3000 [03:35<04:13, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1297/3000 [03:35<04:02, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1299/3000 [03:35<04:39, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1300/3000 [03:36<07:03, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1302/3000 [03:36<05:54, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1305/3000 [03:37<04:45, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1309/3000 [03:37<03:36, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1314/3000 [03:37<02:46, 10.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1318/3000 [03:38<02:54, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1320/3000 [03:38<03:33, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1321/3000 [03:39<06:15, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1322/3000 [03:39<07:23, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1326/3000 [03:40<05:23, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1329/3000 [03:40<04:18, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1333/3000 [03:40<02:54, 9.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1335/3000 [03:41<03:00, 9.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1340/3000 [03:41<02:19, 11.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1343/3000 [03:42<04:15, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1346/3000 [03:43<05:16, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1350/3000 [03:43<03:26, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1352/3000 [03:43<03:29, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1356/3000 [03:44<03:06, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1360/3000 [03:44<03:24, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1361/3000 [03:44<03:45, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1362/3000 [03:45<04:47, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1364/3000 [03:45<05:36, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1366/3000 [03:45<04:39, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1367/3000 [03:46<05:23, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1369/3000 [03:46<05:15, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1371/3000 [03:46<03:45, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1376/3000 [03:47<02:50, 9.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1379/3000 [03:47<02:48, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1381/3000 [03:48<05:23, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1384/3000 [03:48<04:22, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1385/3000 [03:48<04:18, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1388/3000 [03:49<03:27, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1390/3000 [03:49<02:50, 9.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1392/3000 [03:49<03:28, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1394/3000 [03:49<03:21, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1396/3000 [03:50<03:19, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1400/3000 [03:51<04:44, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1402/3000 [03:51<04:46, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1403/3000 [03:51<04:55, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1405/3000 [03:52<04:51, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1407/3000 [03:52<03:58, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1408/3000 [03:52<04:27, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1413/3000 [03:53<03:16, 8.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1418/3000 [03:53<02:19, 11.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1420/3000 [03:54<03:43, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1422/3000 [03:54<04:38, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1425/3000 [03:55<04:55, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1427/3000 [03:55<03:59, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1431/3000 [03:56<03:54, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1433/3000 [03:56<03:49, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1435/3000 [03:56<03:12, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1437/3000 [03:56<03:34, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1439/3000 [03:57<03:45, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1441/3000 [03:57<03:23, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1442/3000 [03:58<06:19, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1447/3000 [03:58<03:31, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1451/3000 [03:59<03:28, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1453/3000 [03:59<03:30, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1456/3000 [03:59<03:18, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [03:59<03:51, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1458/3000 [04:00<04:13, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 1461/3000 [04:00<04:20, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1465/3000 [04:01<03:14, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1468/3000 [04:01<03:46, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1470/3000 [04:01<03:03, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1474/3000 [04:02<03:45, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1476/3000 [04:02<03:12, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1481/3000 [04:03<03:15, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1483/3000 [04:03<03:26, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1485/3000 [04:04<03:43, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1487/3000 [04:04<04:49, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1489/3000 [04:05<03:55, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1492/3000 [04:05<03:04, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1495/3000 [04:05<02:11, 11.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1497/3000 [04:05<02:40, 9.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1500/3000 [04:06<03:56, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1503/3000 [04:06<04:16, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1507/3000 [04:07<03:37, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1510/3000 [04:07<03:00, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1512/3000 [04:08<02:51, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1513/3000 [04:08<02:57, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1516/3000 [04:08<03:22, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1518/3000 [04:09<04:13, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1519/3000 [04:09<04:46, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1522/3000 [04:09<03:31, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1523/3000 [04:09<03:40, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1524/3000 [04:10<04:02, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1527/3000 [04:10<03:22, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1529/3000 [04:10<03:19, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1531/3000 [04:10<02:26, 10.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1535/3000 [04:11<02:23, 10.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1537/3000 [04:11<04:26, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1538/3000 [04:12<04:34, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1541/3000 [04:12<03:53, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1543/3000 [04:12<03:00, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1546/3000 [04:12<02:41, 9.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1549/3000 [04:13<04:25, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1550/3000 [04:14<04:55, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1551/3000 [04:14<05:03, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1554/3000 [04:14<04:24, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1558/3000 [04:15<03:23, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1559/3000 [04:15<03:22, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1562/3000 [04:16<03:57, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1564/3000 [04:16<03:35, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1565/3000 [04:16<03:57, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1568/3000 [04:16<03:00, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1571/3000 [04:17<03:15, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1573/3000 [04:17<03:43, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▎ | 1575/3000 [04:18<03:28, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1577/3000 [04:18<03:29, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1579/3000 [04:18<03:47, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1581/3000 [04:18<03:00, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1582/3000 [04:19<04:41, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1586/3000 [04:19<03:11, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1590/3000 [04:20<03:07, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1592/3000 [04:20<04:09, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1594/3000 [04:21<04:11, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1597/3000 [04:21<03:33, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1601/3000 [04:22<03:29, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1603/3000 [04:22<03:17, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1605/3000 [04:22<03:32, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1608/3000 [04:23<02:53, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1611/3000 [04:23<03:17, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1612/3000 [04:23<03:07, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1615/3000 [04:24<04:28, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1618/3000 [04:24<03:14, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1619/3000 [04:25<03:44, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1621/3000 [04:25<03:54, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1623/3000 [04:25<03:22, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1626/3000 [04:26<03:16, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1628/3000 [04:26<03:48, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1630/3000 [04:26<03:20, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1632/3000 [04:27<02:59, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1634/3000 [04:27<03:37, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1637/3000 [04:27<02:55, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1638/3000 [04:27<02:59, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1641/3000 [04:28<03:07, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1645/3000 [04:28<02:25, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1647/3000 [04:29<03:39, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1648/3000 [04:29<04:17, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1650/3000 [04:30<04:21, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1652/3000 [04:30<04:14, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1653/3000 [04:30<04:02, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1655/3000 [04:31<04:29, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1658/3000 [04:31<02:44, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1660/3000 [04:31<02:36, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1661/3000 [04:31<03:54, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1662/3000 [04:32<04:09, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1666/3000 [04:32<02:38, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1668/3000 [04:32<02:39, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1669/3000 [04:32<03:43, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1672/3000 [04:33<03:47, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1674/3000 [04:34<04:59, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1676/3000 [04:34<03:44, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1681/3000 [04:34<02:04, 10.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1683/3000 [04:35<02:43, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1685/3000 [04:35<02:29, 8.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1687/3000 [04:35<02:35, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1689/3000 [04:36<03:54, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1693/3000 [04:37<04:53, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1697/3000 [04:37<03:01, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1699/3000 [04:37<02:28, 8.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1701/3000 [04:37<02:32, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1703/3000 [04:38<02:44, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1705/3000 [04:38<03:55, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1707/3000 [04:39<04:53, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1709/3000 [04:40<04:52, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1712/3000 [04:40<05:01, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1713/3000 [04:41<05:04, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1716/3000 [04:41<03:32, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1720/3000 [04:41<02:37, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1721/3000 [04:41<03:24, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1722/3000 [04:42<04:03, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1726/3000 [04:42<03:14, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1727/3000 [04:43<03:34, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1729/3000 [04:43<03:52, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1731/3000 [04:43<03:06, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1732/3000 [04:43<03:39, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1733/3000 [04:44<04:08, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1735/3000 [04:44<04:25, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1738/3000 [04:44<03:08, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1739/3000 [04:45<03:42, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1742/3000 [04:45<02:54, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1743/3000 [04:45<04:03, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1745/3000 [04:46<04:32, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1749/3000 [04:47<03:31, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1750/3000 [04:47<03:33, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1751/3000 [04:47<04:12, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1753/3000 [04:48<04:55, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1755/3000 [04:48<03:36, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1761/3000 [04:48<01:56, 10.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1765/3000 [04:49<02:27, 8.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1767/3000 [04:50<03:50, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1769/3000 [04:50<03:33, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1770/3000 [04:50<05:28, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1771/3000 [04:51<05:11, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1773/3000 [04:51<03:55, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1775/3000 [04:51<03:34, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1776/3000 [04:51<04:09, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1778/3000 [04:52<03:55, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1782/3000 [04:52<02:39, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1783/3000 [04:52<02:54, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1786/3000 [04:53<03:36, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1789/3000 [04:53<02:38, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1790/3000 [04:54<04:33, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1791/3000 [04:54<05:43, 3.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1794/3000 [04:55<03:40, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1796/3000 [04:55<03:10, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1800/3000 [04:55<01:50, 10.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1804/3000 [04:56<03:32, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1806/3000 [04:57<03:40, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1807/3000 [04:57<04:07, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1810/3000 [04:58<03:13, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1811/3000 [04:58<03:04, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1813/3000 [04:58<04:00, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1814/3000 [04:58<03:32, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1815/3000 [04:59<03:57, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1819/3000 [04:59<02:46, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1820/3000 [04:59<03:10, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1821/3000 [05:00<03:55, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1822/3000 [05:00<04:39, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1824/3000 [05:00<03:40, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1828/3000 [05:00<02:32, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1830/3000 [05:01<02:34, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1831/3000 [05:01<02:56, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1834/3000 [05:02<03:09, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1839/3000 [05:02<02:23, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1842/3000 [05:02<02:23, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1848/3000 [05:03<02:32, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1850/3000 [05:04<03:22, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1852/3000 [05:04<03:19, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1855/3000 [05:05<02:54, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1856/3000 [05:05<02:49, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1857/3000 [05:05<03:53, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1859/3000 [05:06<03:24, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1862/3000 [05:06<03:04, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1865/3000 [05:06<02:40, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1867/3000 [05:07<02:27, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1871/3000 [05:07<03:01, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1873/3000 [05:08<02:53, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1877/3000 [05:08<01:53, 9.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1879/3000 [05:08<01:37, 11.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1881/3000 [05:08<02:07, 8.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1883/3000 [05:09<02:05, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1885/3000 [05:09<03:09, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1886/3000 [05:10<05:03, 3.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1888/3000 [05:10<04:32, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1892/3000 [05:11<03:12, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1896/3000 [05:11<02:09, 8.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1898/3000 [05:11<02:01, 9.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1901/3000 [05:12<02:52, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1904/3000 [05:13<02:54, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1905/3000 [05:13<03:08, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1907/3000 [05:13<03:45, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1911/3000 [05:14<02:29, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1914/3000 [05:14<01:42, 10.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1918/3000 [05:15<02:10, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1920/3000 [05:15<02:44, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1923/3000 [05:15<02:30, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1925/3000 [05:16<02:32, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1926/3000 [05:16<04:28, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1928/3000 [05:17<03:36, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1932/3000 [05:17<02:54, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1934/3000 [05:17<02:49, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1936/3000 [05:18<03:10, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1941/3000 [05:19<02:17, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1944/3000 [05:19<01:41, 10.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1946/3000 [05:19<02:32, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1949/3000 [05:20<02:50, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1950/3000 [05:20<03:20, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1953/3000 [05:20<02:39, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1954/3000 [05:21<03:36, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1957/3000 [05:21<02:57, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1960/3000 [05:22<02:32, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1966/3000 [05:22<01:50, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1968/3000 [05:23<03:11, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1970/3000 [05:23<02:56, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1974/3000 [05:24<02:08, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1976/3000 [05:24<02:16, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1978/3000 [05:24<02:36, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1980/3000 [05:25<02:48, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1983/3000 [05:25<02:19, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1984/3000 [05:25<02:59, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1988/3000 [05:26<02:31, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1989/3000 [05:26<02:29, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1992/3000 [05:27<02:06, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1994/3000 [05:27<01:44, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1998/3000 [05:27<01:38, 10.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2000/3000 [05:28<02:14, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2001/3000 [05:28<03:15, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2004/3000 [05:29<03:07, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2005/3000 [05:29<03:20, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2006/3000 [05:29<03:35, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2010/3000 [05:29<02:13, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2013/3000 [05:30<01:32, 10.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2015/3000 [05:30<02:10, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2018/3000 [05:30<02:09, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2020/3000 [05:31<02:28, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2022/3000 [05:31<02:58, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2024/3000 [05:32<03:26, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2025/3000 [05:32<03:18, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2030/3000 [05:33<01:58, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2032/3000 [05:33<02:35, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2037/3000 [05:33<01:53, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [05:34<02:32, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2042/3000 [05:35<03:31, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2044/3000 [05:35<03:04, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2047/3000 [05:36<02:51, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2049/3000 [05:36<02:59, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2051/3000 [05:36<02:30, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2052/3000 [05:36<02:18, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2056/3000 [05:37<02:38, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2059/3000 [05:38<02:26, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2062/3000 [05:38<02:34, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2063/3000 [05:39<02:53, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2065/3000 [05:39<03:00, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2067/3000 [05:39<02:58, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2070/3000 [05:40<02:02, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2073/3000 [05:40<02:15, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2075/3000 [05:40<01:50, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2077/3000 [05:41<02:06, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2079/3000 [05:41<02:22, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2080/3000 [05:41<02:11, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2081/3000 [05:41<03:03, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2084/3000 [05:42<03:13, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2091/3000 [05:43<01:17, 11.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2093/3000 [05:43<02:30, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2095/3000 [05:44<02:21, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2097/3000 [05:44<02:23, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2100/3000 [05:45<02:38, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2103/3000 [05:45<02:11, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2106/3000 [05:45<01:33, 9.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2108/3000 [05:45<01:52, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2110/3000 [05:46<02:01, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2112/3000 [05:46<02:38, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2115/3000 [05:47<01:58, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2116/3000 [05:47<02:18, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2119/3000 [05:47<02:07, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2121/3000 [05:48<02:20, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2122/3000 [05:48<03:10, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2125/3000 [05:49<02:58, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2127/3000 [05:49<02:35, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2132/3000 [05:50<01:52, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2134/3000 [05:50<01:45, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2136/3000 [05:50<02:03, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2137/3000 [05:50<02:39, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2139/3000 [05:51<02:46, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2141/3000 [05:52<03:26, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2146/3000 [05:52<01:36, 8.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2148/3000 [05:52<02:07, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2152/3000 [05:53<01:49, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2154/3000 [05:53<01:48, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2156/3000 [05:53<01:31, 9.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2160/3000 [05:54<02:30, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2162/3000 [05:55<02:27, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2164/3000 [05:55<02:31, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2166/3000 [05:55<02:38, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2171/3000 [05:56<02:00, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2172/3000 [05:56<02:01, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2177/3000 [05:57<01:34, 8.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2179/3000 [05:58<02:44, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2182/3000 [05:58<02:04, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2184/3000 [05:58<01:55, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2186/3000 [05:59<02:50, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2187/3000 [05:59<02:52, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2192/3000 [06:00<01:48, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2194/3000 [06:00<01:36, 8.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2196/3000 [06:00<01:33, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2198/3000 [06:00<01:48, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2200/3000 [06:00<01:38, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2201/3000 [06:01<01:56, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2202/3000 [06:01<03:23, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2204/3000 [06:02<02:43, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2206/3000 [06:02<03:18, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2211/3000 [06:03<01:31, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2213/3000 [06:03<01:17, 10.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2215/3000 [06:03<01:28, 8.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2219/3000 [06:04<01:32, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2221/3000 [06:04<02:22, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2223/3000 [06:05<02:19, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2225/3000 [06:05<03:00, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2227/3000 [06:06<02:11, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2229/3000 [06:06<02:40, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2231/3000 [06:06<02:03, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2232/3000 [06:06<01:58, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2238/3000 [06:07<01:04, 11.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2241/3000 [06:07<01:01, 12.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2243/3000 [06:08<01:37, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2246/3000 [06:09<02:26, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2247/3000 [06:09<02:28, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2250/3000 [06:09<02:38, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2253/3000 [06:10<02:15, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2256/3000 [06:10<01:41, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2258/3000 [06:11<01:42, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2260/3000 [06:11<01:36, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2261/3000 [06:11<02:28, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2264/3000 [06:12<02:14, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2267/3000 [06:12<02:14, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2269/3000 [06:13<01:56, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2272/3000 [06:13<01:43, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2274/3000 [06:13<01:27, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2276/3000 [06:14<01:57, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2277/3000 [06:14<01:53, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2280/3000 [06:14<02:17, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2285/3000 [06:15<01:44, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2290/3000 [06:16<01:35, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2293/3000 [06:16<01:19, 8.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2295/3000 [06:16<01:41, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2296/3000 [06:17<02:09, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2298/3000 [06:17<02:02, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2302/3000 [06:17<01:26, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2304/3000 [06:18<02:01, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2308/3000 [06:19<01:44, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2311/3000 [06:19<01:25, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2313/3000 [06:19<01:48, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2316/3000 [06:20<02:01, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2320/3000 [06:21<01:39, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2323/3000 [06:21<02:00, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2328/3000 [06:22<01:45, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2332/3000 [06:24<02:55, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2340/3000 [06:24<01:17, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2344/3000 [06:25<01:25, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2346/3000 [06:25<01:41, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2350/3000 [06:26<02:06, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2354/3000 [06:27<01:46, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2356/3000 [06:27<01:41, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2358/3000 [06:27<01:35, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2360/3000 [06:28<01:39, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2361/3000 [06:28<02:00, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2363/3000 [06:28<01:46, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2364/3000 [06:29<02:10, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2366/3000 [06:29<01:53, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2369/3000 [06:30<01:48, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2370/3000 [06:30<01:47, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2372/3000 [06:30<01:38, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2373/3000 [06:30<02:00, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2375/3000 [06:31<01:55, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2379/3000 [06:31<01:40, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2382/3000 [06:32<01:12, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2384/3000 [06:32<01:24, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2386/3000 [06:32<01:38, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2390/3000 [06:33<01:38, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2394/3000 [06:33<01:13, 8.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2397/3000 [06:34<01:31, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2400/3000 [06:34<01:16, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2401/3000 [06:35<01:21, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2403/3000 [06:35<01:23, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2405/3000 [06:36<02:39, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2407/3000 [06:36<02:02, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2408/3000 [06:36<01:50, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2410/3000 [06:37<01:54, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2414/3000 [06:37<01:12, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2417/3000 [06:37<00:52, 11.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2419/3000 [06:38<01:05, 8.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2421/3000 [06:38<01:32, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2425/3000 [06:39<01:30, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2427/3000 [06:39<01:36, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2431/3000 [06:40<01:24, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2434/3000 [06:40<01:09, 8.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2436/3000 [06:40<01:19, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2439/3000 [06:41<01:14, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2440/3000 [06:41<01:38, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2442/3000 [06:42<01:39, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2444/3000 [06:42<01:52, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2446/3000 [06:42<01:42, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2449/3000 [06:43<01:41, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2450/3000 [06:43<01:37, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2451/3000 [06:43<02:02, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2455/3000 [06:44<01:27, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2457/3000 [06:44<01:07, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2460/3000 [06:45<01:23, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2461/3000 [06:45<01:41, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2462/3000 [06:45<02:03, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2465/3000 [06:46<01:50, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2466/3000 [06:46<01:52, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2470/3000 [06:46<01:11, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2472/3000 [06:47<01:09, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2473/3000 [06:47<01:24, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2477/3000 [06:48<01:35, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2482/3000 [06:48<00:52, 9.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2484/3000 [06:49<01:14, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2486/3000 [06:49<01:15, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2488/3000 [06:49<01:23, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2489/3000 [06:50<01:35, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2491/3000 [06:50<01:25, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2493/3000 [06:50<01:34, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2495/3000 [06:51<01:36, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2497/3000 [06:51<01:36, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2499/3000 [06:51<01:19, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2501/3000 [06:52<01:14, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2502/3000 [06:52<01:26, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2505/3000 [06:52<01:17, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2506/3000 [06:53<01:17, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2509/3000 [06:53<01:28, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2511/3000 [06:53<01:03, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2512/3000 [06:54<01:23, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2513/3000 [06:54<01:36, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2514/3000 [06:54<01:55, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2517/3000 [06:55<01:30, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2521/3000 [06:55<01:14, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2525/3000 [06:55<00:44, 10.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2527/3000 [06:56<00:50, 9.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2529/3000 [06:56<00:55, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2531/3000 [06:56<01:05, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2533/3000 [06:57<01:45, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2536/3000 [06:58<01:23, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2542/3000 [06:58<00:45, 10.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2546/3000 [06:59<00:53, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2548/3000 [06:59<00:56, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2551/3000 [06:59<00:43, 10.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2553/3000 [07:00<01:16, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2554/3000 [07:00<01:18, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2555/3000 [07:00<01:25, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2559/3000 [07:01<01:17, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2562/3000 [07:01<00:52, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2564/3000 [07:01<00:49, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2569/3000 [07:02<00:39, 10.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2571/3000 [07:03<01:20, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2573/3000 [07:03<01:13, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2575/3000 [07:03<01:03, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2576/3000 [07:03<01:08, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2577/3000 [07:04<01:31, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2579/3000 [07:04<01:36, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2584/3000 [07:05<01:01, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2586/3000 [07:05<00:52, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2588/3000 [07:05<00:54, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2591/3000 [07:06<00:48, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2593/3000 [07:06<01:31, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2595/3000 [07:07<01:39, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2597/3000 [07:07<01:27, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2601/3000 [07:08<00:56, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2604/3000 [07:08<00:46, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2606/3000 [07:08<00:55, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2607/3000 [07:09<01:18, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2612/3000 [07:09<00:47, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2614/3000 [07:10<01:04, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2616/3000 [07:10<01:12, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2618/3000 [07:11<01:14, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2621/3000 [07:11<01:07, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2622/3000 [07:11<01:01, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2624/3000 [07:12<00:56, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2625/3000 [07:12<01:00, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2626/3000 [07:12<01:22, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2627/3000 [07:12<01:24, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2630/3000 [07:13<00:59, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2633/3000 [07:13<00:57, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2636/3000 [07:14<01:23, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2641/3000 [07:14<00:40, 8.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2643/3000 [07:15<01:00, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2645/3000 [07:15<00:51, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2650/3000 [07:16<00:36, 9.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2652/3000 [07:16<00:53, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2654/3000 [07:17<01:22, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2658/3000 [07:18<01:09, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2660/3000 [07:18<00:54, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2663/3000 [07:18<00:45, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2665/3000 [07:19<00:45, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2667/3000 [07:19<01:09, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2671/3000 [07:20<00:48, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2674/3000 [07:20<00:57, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2676/3000 [07:21<01:01, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2678/3000 [07:21<00:59, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2681/3000 [07:22<00:48, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2683/3000 [07:22<00:49, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2685/3000 [07:22<00:59, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2688/3000 [07:23<00:33, 9.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2690/3000 [07:23<00:32, 9.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2692/3000 [07:23<00:50, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2693/3000 [07:24<00:59, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2694/3000 [07:24<01:02, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2696/3000 [07:24<01:03, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2699/3000 [07:25<00:52, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2702/3000 [07:25<00:39, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2705/3000 [07:25<00:29, 10.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2707/3000 [07:26<00:30, 9.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2709/3000 [07:26<00:30, 9.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2713/3000 [07:26<00:39, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2715/3000 [07:27<00:55, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2719/3000 [07:28<00:43, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2721/3000 [07:28<00:40, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2725/3000 [07:28<00:28, 9.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2727/3000 [07:28<00:30, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2730/3000 [07:29<00:43, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2732/3000 [07:30<00:58, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2733/3000 [07:30<01:16, 3.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2736/3000 [07:31<00:48, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2741/3000 [07:31<00:31, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2743/3000 [07:31<00:30, 8.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2745/3000 [07:32<00:29, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2747/3000 [07:32<00:33, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2748/3000 [07:32<00:49, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2752/3000 [07:33<00:41, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2753/3000 [07:33<00:55, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2754/3000 [07:34<00:54, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2757/3000 [07:34<00:38, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2760/3000 [07:34<00:25, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2762/3000 [07:34<00:28, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2765/3000 [07:35<00:34, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2767/3000 [07:35<00:34, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2768/3000 [07:35<00:36, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2769/3000 [07:36<00:42, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2770/3000 [07:36<00:49, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2773/3000 [07:37<00:45, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2776/3000 [07:37<00:34, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2779/3000 [07:37<00:25, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [07:37<00:20, 10.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2783/3000 [07:38<00:39, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2786/3000 [07:38<00:30, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2789/3000 [07:39<00:41, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2790/3000 [07:39<00:37, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2791/3000 [07:40<00:41, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2794/3000 [07:40<00:29, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2797/3000 [07:40<00:28, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2799/3000 [07:40<00:22, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2802/3000 [07:41<00:29, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2804/3000 [07:42<00:36, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2807/3000 [07:43<00:48, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2810/3000 [07:43<00:32, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2813/3000 [07:43<00:29, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2817/3000 [07:44<00:23, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2818/3000 [07:44<00:23, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2820/3000 [07:44<00:28, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2822/3000 [07:45<00:36, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2823/3000 [07:45<00:40, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2826/3000 [07:46<00:35, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2828/3000 [07:46<00:30, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2832/3000 [07:46<00:23, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2833/3000 [07:47<00:24, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2834/3000 [07:47<00:26, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2835/3000 [07:47<00:29, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2837/3000 [07:47<00:28, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2839/3000 [07:48<00:22, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2840/3000 [07:48<00:25, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2843/3000 [07:48<00:21, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2844/3000 [07:49<00:31, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2847/3000 [07:49<00:23, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2848/3000 [07:49<00:31, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2849/3000 [07:50<00:38, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2853/3000 [07:50<00:22, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2855/3000 [07:50<00:23, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2858/3000 [07:51<00:17, 7.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2860/3000 [07:51<00:19, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2862/3000 [07:51<00:18, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2863/3000 [07:52<00:24, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2865/3000 [07:52<00:20, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2867/3000 [07:52<00:22, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2869/3000 [07:53<00:27, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2871/3000 [07:53<00:21, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2875/3000 [07:53<00:12, 10.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2877/3000 [07:54<00:18, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2879/3000 [07:54<00:20, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2881/3000 [07:55<00:18, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2884/3000 [07:55<00:15, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2885/3000 [07:55<00:17, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2887/3000 [07:56<00:20, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2889/3000 [07:56<00:16, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2891/3000 [07:56<00:22, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2893/3000 [07:57<00:17, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2895/3000 [07:57<00:17, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2897/3000 [07:57<00:13, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2899/3000 [07:58<00:15, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2901/3000 [07:58<00:15, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2903/3000 [07:58<00:18, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2905/3000 [07:59<00:16, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2907/3000 [08:00<00:25, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2912/3000 [08:00<00:14, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2915/3000 [08:00<00:10, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2917/3000 [08:00<00:10, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2921/3000 [08:01<00:08, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2923/3000 [08:01<00:10, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2924/3000 [08:02<00:22, 3.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2926/3000 [08:03<00:19, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2930/3000 [08:03<00:10, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2932/3000 [08:03<00:10, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2936/3000 [08:04<00:06, 9.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2938/3000 [08:04<00:05, 11.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2940/3000 [08:04<00:08, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2942/3000 [08:05<00:08, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2943/3000 [08:05<00:12, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2944/3000 [08:05<00:12, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2948/3000 [08:06<00:08, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2950/3000 [08:06<00:07, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2953/3000 [08:07<00:06, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2955/3000 [08:07<00:05, 8.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2957/3000 [08:07<00:05, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2961/3000 [08:08<00:05, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2966/3000 [08:09<00:05, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2968/3000 [08:09<00:05, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2971/3000 [08:10<00:04, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2973/3000 [08:10<00:03, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2975/3000 [08:10<00:04, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2976/3000 [08:11<00:04, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2979/3000 [08:11<00:04, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2984/3000 [08:12<00:02, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2985/3000 [08:12<00:02, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2989/3000 [08:13<00:01, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2990/3000 [08:13<00:01, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2991/3000 [08:13<00:01, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2993/3000 [08:13<00:01, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2997/3000 [08:14<00:00, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2999/3000 [08:15<00:00, 4.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [08:27<00:00, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-03 18:53:53.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mEvaluation metrics (before optimization): {'f1': 0.0003333333333333333, 'em': 0.0003333333333333333, 'acc': 0.27066666666666667}\u001b[0m\n",
"\u001b[32m2026-01-03 18:53:53.022\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m6\u001b[0m - \u001b[1mOptimizing workflow...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
" 0%| | 0/20 [00:00, ?it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-03 18:53:53.027\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n",
"\u001b[32m2026-01-03 18:54:12.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m347\u001b[0m - \u001b[1mComputing gradients...\u001b[0m\n",
"\u001b[32m2026-01-03 18:55:39.882\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m349\u001b[0m - \u001b[1mUpdating agents...\u001b[0m\n",
"\u001b[32m2026-01-03 18:55:45.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m353\u001b[0m - \u001b[1mAgents updated\u001b[0m\n",
"\u001b[32m2026-01-03 18:55:45.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m271\u001b[0m - \u001b[1mEvaluating the workflow at step 1 ...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 0%| | 0/50 [00:00, ?it/s]\u001b[A\n",
"Evaluating workflow: 2%|▏ | 1/50 [00:02<01:40, 2.05s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 4%|▍ | 2/50 [00:02<00:53, 1.11s/it]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 8%|▊ | 4/50 [00:02<00:22, 2.07it/s]\u001b[A\n",
"Evaluating workflow: 10%|█ | 5/50 [00:02<00:16, 2.74it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 16%|█▌ | 8/50 [00:02<00:07, 5.48it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 26%|██▌ | 13/50 [00:03<00:03, 9.89it/s]\u001b[A\n",
"Evaluating workflow: 30%|███ | 15/50 [00:03<00:03, 10.55it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 36%|███▌ | 18/50 [00:03<00:02, 13.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 40%|████ | 20/50 [00:03<00:02, 10.22it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 44%|████▍ | 22/50 [00:05<00:06, 4.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 48%|████▊ | 24/50 [00:05<00:06, 4.10it/s]\u001b[A\n",
"Evaluating workflow: 52%|█████▏ | 26/50 [00:05<00:04, 5.25it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 62%|██████▏ | 31/50 [00:06<00:02, 7.52it/s]\u001b[A\n",
"Evaluating workflow: 68%|██████▊ | 34/50 [00:06<00:01, 8.94it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 72%|███████▏ | 36/50 [00:06<00:01, 9.10it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 76%|███████▌ | 38/50 [00:06<00:01, 9.79it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 80%|████████ | 40/50 [00:07<00:01, 5.47it/s]\u001b[A\n",
"Evaluating workflow: 84%|████████▍ | 42/50 [00:07<00:01, 6.32it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 86%|████████▌ | 43/50 [00:08<00:01, 4.81it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 88%|████████▊ | 44/50 [00:08<00:01, 3.88it/s]\u001b[A\n",
"Evaluating workflow: 94%|█████████▍| 47/50 [00:08<00:00, 6.17it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"Evaluating workflow: 100%|██████████| 50/50 [00:09<00:00, 5.44it/s]\u001b[A"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-03 18:55:54.496\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m275\u001b[0m - \u001b[1mStep 1 metrics: {'f1': 0.0, 'em': 0.0, 'acc': 0.26}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"\r",
" 5%|▌ | 1/20 [02:01<38:27, 121.47s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-03 18:55:54.497\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.textgrad_optimizer\u001b[0m:\u001b[36mstep\u001b[0m:\u001b[36m319\u001b[0m - \u001b[1mExecuting workflow...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
" 5%|▌ | 1/20 [02:17<43:37, 137.77s/it]\n"
]
},
{
"ename": "AssertionError",
"evalue": "Value must be a string, int, or image (bytes). Got: ",
"output_type": "error",
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mAssertionError\u001b[39m Traceback (most recent call last)",
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[8]\u001b[39m\u001b[32m, line 7\u001b[39m\n\u001b[32m 4\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mEvaluation metrics (before optimization): \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresults\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 6\u001b[39m logger.info(\u001b[33m\"\u001b[39m\u001b[33mOptimizing workflow...\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m7\u001b[39m \u001b[43mtextgrad_optimizer\u001b[49m\u001b[43m.\u001b[49m\u001b[43moptimize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbenchmark\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m8\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 8\u001b[39m textgrad_optimizer.restore_best_graph()\n\u001b[32m 10\u001b[39m logger.info(\u001b[33m\"\u001b[39m\u001b[33mEvaluating workflow on test set...\u001b[39m\u001b[33m\"\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/textgrad_optimizer.py:268\u001b[39m, in \u001b[36mTextGradOptimizer.optimize\u001b[39m\u001b[34m(self, dataset, use_answers, seed)\u001b[39m\n\u001b[32m 266\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m step \u001b[38;5;129;01min\u001b[39;00m tqdm(\u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m.max_steps)):\n\u001b[32m 267\u001b[39m inputs, labels = \u001b[38;5;28mnext\u001b[39m(data_iterator)\n\u001b[32m--> \u001b[39m\u001b[32m268\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_answers\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 270\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.eval_every_n_steps \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m (step + \u001b[32m1\u001b[39m) % \u001b[38;5;28mself\u001b[39m.eval_every_n_steps == \u001b[32m0\u001b[39m:\n\u001b[32m 271\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mEvaluating the workflow at step \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mstep+\u001b[32m1\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m ...\u001b[39m\u001b[33m\"\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/textgrad_optimizer.py:326\u001b[39m, in \u001b[36mTextGradOptimizer.step\u001b[39m\u001b[34m(self, inputs, labels, dataset, use_answers)\u001b[39m\n\u001b[32m 323\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mLabels must be provided if `use_answers` is True.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 325\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m \u001b[38;5;28minput\u001b[39m, label \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(inputs, labels, strict=\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[32m--> \u001b[39m\u001b[32m326\u001b[39m output = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m 327\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(label, \u001b[38;5;28mstr\u001b[39m):\n\u001b[32m 328\u001b[39m label = Variable(label, requires_grad=\u001b[38;5;28;01mFalse\u001b[39;00m, role_description=\u001b[33m\"\u001b[39m\u001b[33mcorrect answer for the query\u001b[39m\u001b[33m\"\u001b[39m)\n",
"\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/textgrad_optimizer.py:361\u001b[39m, in \u001b[36mTextGradOptimizer.forward\u001b[39m\u001b[34m(self, inputs)\u001b[39m\n\u001b[32m 359\u001b[39m end_node = \u001b[38;5;28mself\u001b[39m.graph.find_end_nodes()[\u001b[32m0\u001b[39m]\n\u001b[32m 360\u001b[39m input_variables = \u001b[38;5;28mself\u001b[39m._initial_inputs_to_variables(inputs)\n\u001b[32m--> \u001b[39m\u001b[32m361\u001b[39m output = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_compute_node\u001b[49m\u001b[43m(\u001b[49m\u001b[43mend_node\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_variables\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 362\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m output\n",
"\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/textgrad_optimizer.py:534\u001b[39m, in \u001b[36mTextGradOptimizer._compute_node\u001b[39m\u001b[34m(self, node, initial_inputs)\u001b[39m\n\u001b[32m 531\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m node_name \u001b[38;5;129;01min\u001b[39;00m input_node_names:\n\u001b[32m 532\u001b[39m input_variables[node_name] = \u001b[38;5;28mself\u001b[39m._compute_node(node_name, initial_inputs)\n\u001b[32m--> \u001b[39m\u001b[32m534\u001b[39m output_variable = \u001b[43mnode\u001b[49m\u001b[43m.\u001b[49m\u001b[43mtextgrad_agent\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_variables\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 535\u001b[39m \u001b[38;5;28mself\u001b[39m._visited_nodes.add(node.name)\n\u001b[32m 536\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m output_variable\n",
"\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/textgrad_optimizer.py:154\u001b[39m, in \u001b[36mTextGradAgent.__call__\u001b[39m\u001b[34m(self, inputs)\u001b[39m\n\u001b[32m 146\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Given textgrad.Variable inputs, generates a textgrad.Variable output.\"\"\"\u001b[39;00m\n\u001b[32m 148\u001b[39m forward_inputs: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Variable] = {\n\u001b[32m 149\u001b[39m \u001b[33m\"\u001b[39m\u001b[33minstruction\u001b[39m\u001b[33m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m.instruction, \n\u001b[32m 150\u001b[39m \u001b[33m\"\u001b[39m\u001b[33msystem_prompt\u001b[39m\u001b[33m\"\u001b[39m: \u001b[38;5;28mself\u001b[39m.system_prompt,\n\u001b[32m 151\u001b[39m **inputs\n\u001b[32m 152\u001b[39m }\n\u001b[32m--> \u001b[39m\u001b[32m154\u001b[39m output_variable = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mforward_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43moutput_description\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 155\u001b[39m output_variable.parsed_outputs = \u001b[38;5;28mself\u001b[39m._agent_call.last_outputs\n\u001b[32m 156\u001b[39m \u001b[38;5;28mself\u001b[39m.last_output = output_variable\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/site-packages/textgrad/autograd/function.py:17\u001b[39m, in \u001b[36mFunction.__call__\u001b[39m\u001b[34m(self, *args, **kwargs)\u001b[39m\n\u001b[32m 16\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, *args, **kwargs):\n\u001b[32m---> \u001b[39m\u001b[32m17\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/site-packages/textgrad/autograd/string_based_ops.py:73\u001b[39m, in \u001b[36mStringBasedFunction.forward\u001b[39m\u001b[34m(self, inputs, response_role_description)\u001b[39m\n\u001b[32m 70\u001b[39m response_string = \u001b[38;5;28mself\u001b[39m.fn(**inputs)\n\u001b[32m 72\u001b[39m \u001b[38;5;66;03m# Create the response variable\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m73\u001b[39m response = \u001b[43mVariable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m 74\u001b[39m \u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresponse_string\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 75\u001b[39m \u001b[43m \u001b[49m\u001b[43mpredecessors\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 76\u001b[39m \u001b[43m \u001b[49m\u001b[43mrole_description\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresponse_role_description\u001b[49m\n\u001b[32m 77\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 79\u001b[39m logger.info(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mStringBasedFunction\u001b[39m\u001b[33m\"\u001b[39m, extra={\u001b[33m\"\u001b[39m\u001b[33mtext\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mIn: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minputs\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m, Out: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse_string\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m})\n\u001b[32m 81\u001b[39m \u001b[38;5;66;03m# Populate the gradient function, using a container to store the backward function and the context\u001b[39;00m\n",
"\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/site-packages/textgrad/variable.py:43\u001b[39m, in \u001b[36mVariable.__init__\u001b[39m\u001b[34m(self, value, image_path, predecessors, requires_grad, role_description)\u001b[39m\n\u001b[32m 39\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;129;01mnot\u001b[39;00m requires_grad) \u001b[38;5;129;01mand\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(_predecessor_requires_grad) > \u001b[32m0\u001b[39m):\n\u001b[32m 40\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[33m\"\u001b[39m\u001b[33mIf the variable does not require grad, none of its predecessors should require grad.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 41\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mIn this case, following predecessors require grad: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m_predecessor_requires_grad\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m43\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(value) \u001b[38;5;129;01min\u001b[39;00m [\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mbytes\u001b[39m, \u001b[38;5;28mint\u001b[39m], \u001b[33m\"\u001b[39m\u001b[33mValue must be a string, int, or image (bytes). Got: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[33m\"\u001b[39m.format(\u001b[38;5;28mtype\u001b[39m(value))\n\u001b[32m 44\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, \u001b[38;5;28mint\u001b[39m):\n\u001b[32m 45\u001b[39m value = \u001b[38;5;28mstr\u001b[39m(value)\n",
"\u001b[31mAssertionError\u001b[39m: Value must be a string, int, or image (bytes). Got: "
]
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")\n",
"\n",
"logger.info(\"Optimizing workflow...\")\n",
"textgrad_optimizer.optimize(benchmark, seed=8)\n",
"textgrad_optimizer.restore_best_graph()\n",
"\n",
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4b6f274d",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 1/3000 [00:01<1:37:59, 1.96s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 4/3000 [00:02<21:52, 2.28it/s] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 7/3000 [00:02<12:04, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 9/3000 [00:02<08:35, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 13/3000 [00:03<05:34, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 17/3000 [00:03<04:05, 12.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 19/3000 [00:04<10:15, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 21/3000 [00:04<10:07, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 23/3000 [00:05<10:08, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 25/3000 [00:05<09:35, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 27/3000 [00:06<10:56, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 31/3000 [00:06<06:12, 7.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 33/3000 [00:06<05:50, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 37/3000 [00:07<05:12, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 39/3000 [00:07<08:45, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 40/3000 [00:08<09:29, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 41/3000 [00:08<13:02, 3.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 45/3000 [00:09<09:20, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 47/3000 [00:09<09:38, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 52/3000 [00:10<06:36, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 54/3000 [00:10<05:40, 8.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 56/3000 [00:10<05:21, 9.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 58/3000 [00:10<05:51, 8.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 59/3000 [00:11<11:06, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 61/3000 [00:12<12:07, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 64/3000 [00:12<10:26, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 66/3000 [00:13<10:23, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 67/3000 [00:13<11:00, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 69/3000 [00:13<08:48, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 71/3000 [00:14<08:27, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 72/3000 [00:14<09:10, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 77/3000 [00:14<06:40, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 79/3000 [00:15<07:57, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 82/3000 [00:15<08:06, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 84/3000 [00:16<08:02, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 85/3000 [00:16<07:56, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 86/3000 [00:16<10:38, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 88/3000 [00:17<13:17, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 89/3000 [00:17<11:03, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 95/3000 [00:18<06:31, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 97/3000 [00:18<06:05, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 98/3000 [00:18<06:48, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 99/3000 [00:18<07:42, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 101/3000 [00:19<08:23, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 103/3000 [00:19<05:59, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 105/3000 [00:19<05:35, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 106/3000 [00:20<12:55, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:20<13:00, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 111/3000 [00:21<07:39, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 113/3000 [00:21<06:13, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 115/3000 [00:21<09:17, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 116/3000 [00:22<10:45, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 119/3000 [00:22<08:05, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 120/3000 [00:22<08:17, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 121/3000 [00:22<08:39, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 122/3000 [00:23<10:34, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 125/3000 [00:23<08:55, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 128/3000 [00:24<07:34, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 130/3000 [00:24<05:55, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 134/3000 [00:24<04:44, 10.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 136/3000 [00:25<09:13, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 139/3000 [00:25<07:48, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 140/3000 [00:26<09:21, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 143/3000 [00:26<08:24, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 145/3000 [00:27<08:29, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 147/3000 [00:27<09:09, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▍ | 149/3000 [00:27<09:07, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 153/3000 [00:28<07:24, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 156/3000 [00:28<07:18, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 157/3000 [00:29<08:03, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 160/3000 [00:29<09:11, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 161/3000 [00:30<09:36, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 165/3000 [00:30<07:41, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 166/3000 [00:31<09:08, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 168/3000 [00:31<09:27, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 170/3000 [00:31<06:41, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 172/3000 [00:32<08:02, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 174/3000 [00:32<08:32, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 175/3000 [00:32<11:52, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 179/3000 [00:33<08:53, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 182/3000 [00:33<06:11, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 184/3000 [00:33<05:02, 9.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 186/3000 [00:34<06:45, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 189/3000 [00:34<07:06, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 190/3000 [00:35<07:43, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 192/3000 [00:35<09:16, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 194/3000 [00:36<10:04, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 196/3000 [00:36<09:00, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 197/3000 [00:36<12:08, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 200/3000 [00:37<08:29, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 203/3000 [00:37<05:24, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 205/3000 [00:37<05:39, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 208/3000 [00:38<07:52, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 210/3000 [00:38<06:45, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 211/3000 [00:39<10:13, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 214/3000 [00:39<09:23, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 216/3000 [00:39<07:49, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 217/3000 [00:40<07:18, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 220/3000 [00:40<09:32, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 222/3000 [00:41<08:24, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 226/3000 [00:41<07:15, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 228/3000 [00:42<08:46, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 230/3000 [00:42<08:31, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 232/3000 [00:43<08:51, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 234/3000 [00:43<08:28, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 237/3000 [00:43<05:27, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 239/3000 [00:44<09:58, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 240/3000 [00:44<11:17, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 243/3000 [00:45<08:17, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 244/3000 [00:45<07:32, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 245/3000 [00:45<09:17, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 247/3000 [00:45<08:11, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 248/3000 [00:46<08:40, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 250/3000 [00:46<12:17, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 251/3000 [00:47<11:54, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 255/3000 [00:47<07:02, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 258/3000 [00:47<06:23, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 259/3000 [00:48<08:16, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 262/3000 [00:48<07:50, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 263/3000 [00:48<07:18, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 266/3000 [00:49<09:14, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 268/3000 [00:50<09:20, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 269/3000 [00:50<09:29, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 270/3000 [00:50<10:30, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 272/3000 [00:51<10:43, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 273/3000 [00:51<10:44, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 277/3000 [00:51<06:32, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 279/3000 [00:52<07:09, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 281/3000 [00:52<08:05, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 282/3000 [00:52<08:42, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 286/3000 [00:53<06:51, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 287/3000 [00:53<07:14, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 289/3000 [00:54<09:35, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 292/3000 [00:55<10:51, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 295/3000 [00:55<07:18, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 300/3000 [00:55<04:49, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 302/3000 [00:55<05:13, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 305/3000 [00:56<08:27, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [00:57<09:54, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 309/3000 [00:57<10:12, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 312/3000 [00:58<08:21, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 315/3000 [00:58<06:34, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 317/3000 [00:58<06:38, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 319/3000 [00:59<08:59, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 320/3000 [00:59<10:37, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 322/3000 [01:00<09:26, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 324/3000 [01:00<09:47, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 325/3000 [01:00<09:48, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 328/3000 [01:01<08:38, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 331/3000 [01:01<05:26, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 334/3000 [01:02<06:50, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 337/3000 [01:02<07:32, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 338/3000 [01:02<07:10, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [01:03<09:48, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 343/3000 [01:03<06:18, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 345/3000 [01:04<07:27, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 347/3000 [01:04<08:16, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 348/3000 [01:05<10:03, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 350/3000 [01:05<09:19, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 351/3000 [01:05<08:51, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 353/3000 [01:06<09:10, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 355/3000 [01:06<08:03, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 357/3000 [01:06<07:17, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 361/3000 [01:07<07:52, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 363/3000 [01:07<08:06, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 365/3000 [01:08<08:20, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 369/3000 [01:08<04:19, 10.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 371/3000 [01:08<04:02, 10.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 373/3000 [01:09<08:47, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 374/3000 [01:09<09:14, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 376/3000 [01:10<09:02, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 377/3000 [01:10<09:06, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 380/3000 [01:10<07:11, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 382/3000 [01:11<07:38, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 383/3000 [01:11<07:02, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 387/3000 [01:11<05:39, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 389/3000 [01:12<07:36, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 392/3000 [01:12<06:04, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 394/3000 [01:12<05:34, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 396/3000 [01:13<06:35, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 397/3000 [01:13<09:20, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 400/3000 [01:14<07:22, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 401/3000 [01:14<08:54, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 403/3000 [01:14<08:08, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 404/3000 [01:15<08:46, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 406/3000 [01:15<08:44, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 411/3000 [01:15<03:52, 11.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 413/3000 [01:16<08:39, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 417/3000 [01:17<05:55, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 419/3000 [01:17<05:28, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 422/3000 [01:18<11:12, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 423/3000 [01:18<10:52, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 424/3000 [01:19<10:39, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 426/3000 [01:19<09:05, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 432/3000 [01:19<04:31, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 434/3000 [01:19<03:53, 10.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 436/3000 [01:21<10:46, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 439/3000 [01:21<09:22, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 441/3000 [01:22<08:13, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 442/3000 [01:22<10:07, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 443/3000 [01:23<12:56, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 448/3000 [01:23<06:59, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 450/3000 [01:23<06:52, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 453/3000 [01:24<06:45, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 454/3000 [01:24<06:41, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 456/3000 [01:25<09:41, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 458/3000 [01:25<08:07, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 459/3000 [01:25<11:15, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 462/3000 [01:26<08:02, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 464/3000 [01:26<06:31, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 466/3000 [01:26<06:47, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 467/3000 [01:26<06:11, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 468/3000 [01:27<07:17, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 469/3000 [01:27<07:52, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 471/3000 [01:27<08:08, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 473/3000 [01:28<10:00, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 476/3000 [01:29<10:53, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 482/3000 [01:29<06:09, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 483/3000 [01:30<06:28, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 484/3000 [01:30<07:27, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [01:30<10:40, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 488/3000 [01:31<08:07, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 491/3000 [01:31<06:16, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 492/3000 [01:32<09:38, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 495/3000 [01:33<11:14, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 496/3000 [01:33<09:50, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 498/3000 [01:33<08:08, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 500/3000 [01:34<10:18, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 503/3000 [01:34<06:52, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 504/3000 [01:34<07:01, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 505/3000 [01:35<09:07, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 507/3000 [01:35<07:20, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 510/3000 [01:35<05:45, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 511/3000 [01:36<06:24, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 512/3000 [01:36<11:37, 3.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 514/3000 [01:37<10:14, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 518/3000 [01:37<05:38, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 520/3000 [01:37<06:05, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 521/3000 [01:38<08:31, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 524/3000 [01:38<07:50, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 526/3000 [01:39<07:30, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 528/3000 [01:39<08:06, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 530/3000 [01:40<08:51, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 531/3000 [01:40<09:17, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 532/3000 [01:40<13:24, 3.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 534/3000 [01:41<09:13, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 536/3000 [01:41<07:58, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 538/3000 [01:41<08:53, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 542/3000 [01:42<06:45, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 544/3000 [01:42<07:12, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 546/3000 [01:43<07:55, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 548/3000 [01:44<10:20, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 551/3000 [01:44<08:22, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 552/3000 [01:44<09:47, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 555/3000 [01:45<06:20, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 557/3000 [01:45<06:13, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 560/3000 [01:45<05:27, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 561/3000 [01:45<05:29, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 563/3000 [01:46<06:35, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 566/3000 [01:46<06:22, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 567/3000 [01:47<06:45, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 569/3000 [01:48<12:17, 3.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 573/3000 [01:48<06:08, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 576/3000 [01:49<06:39, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 578/3000 [01:49<07:35, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 580/3000 [01:50<08:55, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 583/3000 [01:50<06:26, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 584/3000 [01:50<07:22, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 587/3000 [01:51<07:38, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 588/3000 [01:51<08:55, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 591/3000 [01:52<07:35, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 594/3000 [01:52<06:03, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 596/3000 [01:52<05:37, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 599/3000 [01:53<06:20, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 600/3000 [01:53<07:29, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 601/3000 [01:53<08:30, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 602/3000 [01:54<09:28, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 604/3000 [01:54<09:49, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 608/3000 [01:55<06:02, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 610/3000 [01:55<05:16, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 611/3000 [01:55<05:55, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 614/3000 [01:55<05:40, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 615/3000 [01:56<06:25, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 616/3000 [01:56<07:35, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 619/3000 [01:57<09:34, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 620/3000 [01:57<11:25, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 625/3000 [01:58<05:22, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 628/3000 [01:58<03:47, 10.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 630/3000 [01:58<03:58, 9.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 634/3000 [01:59<06:44, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 636/3000 [02:00<09:54, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 638/3000 [02:00<08:32, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 639/3000 [02:01<09:57, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██▏ | 641/3000 [02:01<08:06, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 644/3000 [02:01<06:50, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 646/3000 [02:02<06:06, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 647/3000 [02:02<07:29, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 650/3000 [02:02<05:48, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 653/3000 [02:03<07:01, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 655/3000 [02:03<05:26, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 656/3000 [02:03<06:37, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 658/3000 [02:04<07:42, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 662/3000 [02:05<06:54, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 663/3000 [02:05<06:22, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 664/3000 [02:05<07:28, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 667/3000 [02:06<07:30, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 670/3000 [02:06<05:17, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 672/3000 [02:07<13:12, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 677/3000 [02:08<05:56, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 679/3000 [02:08<05:37, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 681/3000 [02:08<06:44, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 683/3000 [02:09<06:06, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 687/3000 [02:09<06:10, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 688/3000 [02:10<07:31, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 690/3000 [02:10<09:47, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 692/3000 [02:11<08:59, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 693/3000 [02:11<08:57, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 696/3000 [02:12<06:38, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 700/3000 [02:12<04:43, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 703/3000 [02:13<05:57, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 704/3000 [02:13<05:42, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 705/3000 [02:13<07:00, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 706/3000 [02:14<15:09, 2.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 710/3000 [02:14<07:59, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 714/3000 [02:15<07:35, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 715/3000 [02:16<08:12, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 717/3000 [02:16<08:48, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 718/3000 [02:16<09:05, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 721/3000 [02:17<07:19, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 724/3000 [02:17<06:07, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 725/3000 [02:17<06:55, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 726/3000 [02:18<07:12, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 729/3000 [02:18<07:17, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 731/3000 [02:19<06:19, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 732/3000 [02:19<06:02, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 734/3000 [02:19<05:44, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 736/3000 [02:19<06:15, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 740/3000 [02:20<06:31, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 741/3000 [02:20<06:03, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 742/3000 [02:21<08:16, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 744/3000 [02:21<08:15, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 746/3000 [02:21<06:48, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 747/3000 [02:22<07:09, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 751/3000 [02:22<04:55, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 754/3000 [02:23<05:13, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 757/3000 [02:24<08:15, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 761/3000 [02:24<05:42, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 763/3000 [02:24<05:44, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 765/3000 [02:25<05:00, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 769/3000 [02:25<05:22, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 770/3000 [02:26<06:24, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 771/3000 [02:26<07:04, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 773/3000 [02:26<07:47, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 774/3000 [02:27<10:12, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 776/3000 [02:27<09:42, 3.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 779/3000 [02:28<07:14, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 781/3000 [02:28<07:30, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 782/3000 [02:28<07:43, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 784/3000 [02:29<07:13, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 786/3000 [02:29<04:53, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 787/3000 [02:30<11:41, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 788/3000 [02:30<12:35, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 791/3000 [02:30<07:19, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 793/3000 [02:31<08:30, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 795/3000 [02:31<07:49, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 798/3000 [02:32<05:27, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 800/3000 [02:32<04:49, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 802/3000 [02:33<09:24, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 804/3000 [02:33<07:05, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 805/3000 [02:33<06:43, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 806/3000 [02:34<10:17, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 810/3000 [02:34<06:12, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 814/3000 [02:35<04:02, 9.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 816/3000 [02:36<08:20, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 817/3000 [02:36<09:14, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 819/3000 [02:37<09:45, 3.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 821/3000 [02:37<10:22, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 824/3000 [02:37<05:56, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 826/3000 [02:38<05:58, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 827/3000 [02:38<10:53, 3.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 828/3000 [02:39<13:08, 2.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 833/3000 [02:40<07:38, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 836/3000 [02:40<07:20, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 838/3000 [02:41<05:36, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 840/3000 [02:41<05:09, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 841/3000 [02:41<06:41, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 842/3000 [02:41<06:56, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 845/3000 [02:42<05:55, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 846/3000 [02:42<06:02, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 848/3000 [02:43<08:20, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 851/3000 [02:43<05:14, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 853/3000 [02:43<04:15, 8.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 856/3000 [02:44<05:47, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 857/3000 [02:44<09:14, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 858/3000 [02:45<09:04, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 861/3000 [02:45<06:48, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 862/3000 [02:45<08:31, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 865/3000 [02:46<06:27, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 868/3000 [02:46<04:24, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 870/3000 [02:47<06:01, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 871/3000 [02:47<06:29, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 872/3000 [02:47<06:38, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 875/3000 [02:47<05:41, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 876/3000 [02:48<07:54, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 878/3000 [02:48<06:55, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 879/3000 [02:48<06:13, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 883/3000 [02:49<06:16, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 885/3000 [02:50<06:18, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 886/3000 [02:50<05:47, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 888/3000 [02:50<09:10, 3.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 889/3000 [02:51<08:44, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 891/3000 [02:51<07:00, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 892/3000 [02:51<07:22, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 894/3000 [02:52<07:26, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 897/3000 [02:52<06:04, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 899/3000 [02:52<04:44, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 900/3000 [02:52<05:18, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 902/3000 [02:53<06:57, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 904/3000 [02:54<08:21, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 906/3000 [02:54<07:07, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 908/3000 [02:54<06:05, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 911/3000 [02:55<05:47, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 914/3000 [02:56<07:49, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 917/3000 [02:56<05:00, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 919/3000 [02:56<04:37, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 921/3000 [02:57<08:15, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 924/3000 [02:57<06:46, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 926/3000 [02:58<05:43, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 928/3000 [02:58<06:08, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 930/3000 [02:58<04:38, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 931/3000 [02:59<08:16, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 935/3000 [02:59<05:42, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 939/3000 [02:59<03:53, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 941/3000 [03:00<04:31, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 942/3000 [03:00<04:47, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 943/3000 [03:00<05:50, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 945/3000 [03:01<05:57, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 946/3000 [03:01<05:49, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 947/3000 [03:01<07:24, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 950/3000 [03:02<06:25, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 952/3000 [03:02<06:19, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 953/3000 [03:02<06:53, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 955/3000 [03:03<07:33, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 956/3000 [03:03<07:22, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 959/3000 [03:03<05:35, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 962/3000 [03:03<03:33, 9.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 964/3000 [03:04<03:23, 10.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 967/3000 [03:04<04:17, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 968/3000 [03:04<04:30, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 969/3000 [03:05<09:31, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 971/3000 [03:06<09:07, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 973/3000 [03:06<07:23, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 980/3000 [03:06<03:34, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 984/3000 [03:07<04:57, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 986/3000 [03:08<05:38, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 987/3000 [03:08<05:22, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 988/3000 [03:08<06:11, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 990/3000 [03:08<06:15, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 991/3000 [03:09<06:17, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 994/3000 [03:09<06:39, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 998/3000 [03:10<05:11, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 999/3000 [03:10<05:19, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1001/3000 [03:10<04:36, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1003/3000 [03:11<04:35, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 1004/3000 [03:11<05:08, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1005/3000 [03:11<06:37, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1007/3000 [03:11<05:26, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1009/3000 [03:12<06:57, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1012/3000 [03:12<05:18, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1013/3000 [03:13<06:40, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1016/3000 [03:13<06:03, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1019/3000 [03:13<04:04, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1021/3000 [03:14<04:49, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1022/3000 [03:14<04:51, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1023/3000 [03:15<08:15, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1026/3000 [03:15<06:29, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1028/3000 [03:15<05:51, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1031/3000 [03:16<04:53, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1032/3000 [03:16<06:15, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1034/3000 [03:16<06:22, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1039/3000 [03:17<03:55, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1041/3000 [03:17<04:16, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1042/3000 [03:18<07:10, 4.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1045/3000 [03:18<06:04, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1047/3000 [03:19<06:19, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1050/3000 [03:19<05:25, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1051/3000 [03:19<05:17, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1054/3000 [03:20<04:49, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1058/3000 [03:20<04:14, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1059/3000 [03:21<05:03, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1062/3000 [03:21<04:30, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1065/3000 [03:22<07:01, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1067/3000 [03:22<06:05, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1069/3000 [03:23<08:47, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1070/3000 [03:23<07:18, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1074/3000 [03:24<05:04, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1075/3000 [03:24<05:37, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1079/3000 [03:24<03:32, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1082/3000 [03:25<05:02, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1083/3000 [03:25<06:05, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1084/3000 [03:26<06:28, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1086/3000 [03:26<06:41, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1089/3000 [03:27<06:18, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1090/3000 [03:27<07:05, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1092/3000 [03:27<06:35, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1093/3000 [03:28<06:36, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1096/3000 [03:28<04:59, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1098/3000 [03:28<05:53, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1100/3000 [03:29<05:38, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1103/3000 [03:29<05:38, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1105/3000 [03:30<04:29, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1107/3000 [03:30<04:07, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1108/3000 [03:30<04:35, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1110/3000 [03:30<05:13, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1112/3000 [03:31<05:58, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1113/3000 [03:31<05:19, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1117/3000 [03:32<05:33, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1119/3000 [03:32<07:09, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1120/3000 [03:33<08:40, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1127/3000 [03:33<03:06, 10.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1130/3000 [03:33<02:26, 12.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1132/3000 [03:34<02:44, 11.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1134/3000 [03:35<07:43, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1138/3000 [03:36<06:33, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1140/3000 [03:36<05:46, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1142/3000 [03:36<05:31, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1144/3000 [03:36<04:36, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1146/3000 [03:37<04:35, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1149/3000 [03:37<03:49, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1151/3000 [03:38<05:50, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1152/3000 [03:38<08:19, 3.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1156/3000 [03:39<05:45, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1157/3000 [03:39<06:49, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1160/3000 [03:40<05:34, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1163/3000 [03:40<05:18, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1165/3000 [03:41<05:06, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1167/3000 [03:41<04:45, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [03:41<05:50, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1171/3000 [03:42<06:45, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1173/3000 [03:42<06:40, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1178/3000 [03:43<04:45, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1180/3000 [03:43<05:16, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1182/3000 [03:44<04:22, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1183/3000 [03:44<04:25, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1184/3000 [03:44<04:56, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1188/3000 [03:45<04:47, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1189/3000 [03:45<04:52, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1190/3000 [03:46<10:37, 2.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1193/3000 [03:46<07:06, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1195/3000 [03:47<07:31, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1196/3000 [03:47<06:27, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1201/3000 [03:47<03:45, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1203/3000 [03:48<04:41, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1205/3000 [03:48<04:05, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1207/3000 [03:48<03:53, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1209/3000 [03:49<05:44, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1210/3000 [03:49<07:18, 4.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1211/3000 [03:50<07:58, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1214/3000 [03:50<05:57, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1215/3000 [03:50<06:28, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1217/3000 [03:51<06:14, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1219/3000 [03:51<04:46, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1222/3000 [03:51<04:18, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1225/3000 [03:52<05:01, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1227/3000 [03:52<03:56, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1229/3000 [03:52<03:59, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1232/3000 [03:53<04:14, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1234/3000 [03:53<04:46, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1235/3000 [03:53<05:19, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1237/3000 [03:54<05:28, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1238/3000 [03:54<06:14, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1240/3000 [03:54<05:33, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1243/3000 [03:55<05:22, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1245/3000 [03:55<04:04, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1248/3000 [03:56<05:42, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1250/3000 [03:56<04:40, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1251/3000 [03:56<04:41, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1252/3000 [03:57<05:09, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1254/3000 [03:57<05:08, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1255/3000 [03:57<05:39, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1258/3000 [03:58<05:13, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1260/3000 [03:58<05:02, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1263/3000 [03:58<02:55, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1265/3000 [03:59<04:50, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1268/3000 [04:00<05:49, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1269/3000 [04:00<05:20, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1270/3000 [04:00<06:45, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1273/3000 [04:00<04:56, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▎ | 1275/3000 [04:01<03:38, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1277/3000 [04:01<05:03, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1278/3000 [04:01<06:05, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1281/3000 [04:02<05:06, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1282/3000 [04:02<05:14, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1283/3000 [04:02<05:24, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1286/3000 [04:03<05:24, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1288/3000 [04:03<05:13, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1290/3000 [04:04<06:06, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1292/3000 [04:04<06:48, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1297/3000 [04:05<03:08, 9.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1299/3000 [04:05<03:12, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1301/3000 [04:05<03:50, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1303/3000 [04:05<03:48, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1305/3000 [04:06<04:17, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1306/3000 [04:06<05:09, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1308/3000 [04:07<05:16, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1309/3000 [04:07<05:35, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1311/3000 [04:08<07:45, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▎ | 1312/3000 [04:08<07:31, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1315/3000 [04:08<04:56, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1316/3000 [04:08<05:31, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1319/3000 [04:09<04:18, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1321/3000 [04:09<03:54, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1322/3000 [04:09<04:19, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1325/3000 [04:09<03:38, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1327/3000 [04:10<03:25, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1330/3000 [04:10<02:48, 9.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1332/3000 [04:11<05:14, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1333/3000 [04:11<06:36, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1336/3000 [04:12<05:42, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1337/3000 [04:12<06:31, 4.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1338/3000 [04:13<08:28, 3.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1341/3000 [04:13<06:50, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1345/3000 [04:14<04:02, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1350/3000 [04:14<02:49, 9.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1352/3000 [04:14<02:51, 9.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1354/3000 [04:15<04:53, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1355/3000 [04:15<05:15, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1356/3000 [04:16<05:59, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1357/3000 [04:16<07:33, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1358/3000 [04:16<08:15, 3.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1362/3000 [04:17<05:00, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1368/3000 [04:18<03:51, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1370/3000 [04:18<03:19, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1372/3000 [04:18<03:02, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1374/3000 [04:18<03:29, 7.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1376/3000 [04:19<06:12, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1377/3000 [04:20<07:12, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1378/3000 [04:20<07:27, 3.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1382/3000 [04:21<05:54, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1385/3000 [04:21<04:03, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1386/3000 [04:21<04:04, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1389/3000 [04:22<04:35, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1390/3000 [04:22<04:28, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1393/3000 [04:22<03:17, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1395/3000 [04:23<04:29, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1397/3000 [04:23<04:23, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1398/3000 [04:23<06:33, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1401/3000 [04:24<06:05, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1402/3000 [04:25<07:51, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1406/3000 [04:25<05:25, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1409/3000 [04:26<04:10, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1411/3000 [04:26<05:26, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1415/3000 [04:27<03:25, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1417/3000 [04:27<02:44, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1419/3000 [04:27<04:06, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1421/3000 [04:28<04:04, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1422/3000 [04:28<03:49, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1423/3000 [04:28<05:45, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1425/3000 [04:29<05:18, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1426/3000 [04:29<06:00, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1427/3000 [04:29<06:00, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1430/3000 [04:30<04:26, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1432/3000 [04:30<04:32, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1434/3000 [04:31<06:06, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1436/3000 [04:31<05:08, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1438/3000 [04:31<04:57, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1441/3000 [04:32<04:11, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1442/3000 [04:32<05:35, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1445/3000 [04:32<04:05, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1447/3000 [04:33<03:58, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1449/3000 [04:33<04:05, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1453/3000 [04:33<03:48, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1455/3000 [04:34<04:38, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1456/3000 [04:34<05:34, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1457/3000 [04:35<06:26, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1459/3000 [04:35<05:37, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1460/3000 [04:35<06:14, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 1462/3000 [04:36<05:39, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1464/3000 [04:36<03:50, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1465/3000 [04:36<04:32, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1468/3000 [04:36<03:40, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1471/3000 [04:37<02:58, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1473/3000 [04:37<04:05, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1476/3000 [04:38<05:21, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1477/3000 [04:38<05:15, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1480/3000 [04:39<05:22, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1483/3000 [04:39<03:47, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1486/3000 [04:40<03:18, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1488/3000 [04:40<03:45, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1490/3000 [04:40<03:45, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1491/3000 [04:41<04:01, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1492/3000 [04:41<04:42, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1493/3000 [04:41<06:02, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1494/3000 [04:41<05:49, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1498/3000 [04:42<03:45, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1500/3000 [04:43<04:47, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1501/3000 [04:43<05:00, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1503/3000 [04:43<04:01, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1505/3000 [04:43<03:41, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1506/3000 [04:43<04:16, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1507/3000 [04:44<05:19, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1510/3000 [04:44<04:03, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1513/3000 [04:45<03:41, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1515/3000 [04:45<03:04, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1516/3000 [04:45<03:36, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1519/3000 [04:45<03:27, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1521/3000 [04:46<03:09, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1523/3000 [04:46<03:52, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1525/3000 [04:47<06:36, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1528/3000 [04:47<04:41, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1530/3000 [04:48<04:22, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1533/3000 [04:48<04:40, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1536/3000 [04:48<03:10, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1540/3000 [04:49<02:25, 10.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1543/3000 [04:49<02:35, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1546/3000 [04:50<05:35, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1547/3000 [04:50<05:01, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1548/3000 [04:51<05:54, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1551/3000 [04:51<04:17, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1555/3000 [04:51<02:57, 8.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1557/3000 [04:52<03:54, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1562/3000 [04:52<02:47, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1564/3000 [04:53<02:31, 9.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1566/3000 [04:53<04:46, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1568/3000 [04:54<04:40, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1570/3000 [04:54<04:26, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1572/3000 [04:54<04:19, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▎ | 1575/3000 [04:55<03:30, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1576/3000 [04:55<04:01, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1581/3000 [04:56<04:02, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1583/3000 [04:57<05:12, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1586/3000 [04:57<04:47, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1588/3000 [04:57<04:07, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1593/3000 [04:58<03:02, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1595/3000 [04:58<02:50, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1596/3000 [04:58<03:55, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1600/3000 [04:59<03:24, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1601/3000 [04:59<04:31, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1602/3000 [05:00<04:43, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1605/3000 [05:00<05:09, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1608/3000 [05:01<04:40, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1609/3000 [05:01<04:25, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1612/3000 [05:01<03:14, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1614/3000 [05:02<03:26, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1616/3000 [05:02<03:26, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1618/3000 [05:02<03:33, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1622/3000 [05:03<03:30, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1623/3000 [05:03<03:38, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1625/3000 [05:04<04:12, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1626/3000 [05:04<05:17, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1627/3000 [05:04<05:09, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1629/3000 [05:04<04:25, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1631/3000 [05:05<04:41, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1636/3000 [05:05<02:12, 10.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1638/3000 [05:06<03:54, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1640/3000 [05:06<04:28, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1643/3000 [05:07<03:41, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1645/3000 [05:07<03:46, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1647/3000 [05:07<04:25, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1650/3000 [05:08<03:30, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1651/3000 [05:08<03:21, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1653/3000 [05:09<04:17, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1655/3000 [05:09<03:36, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1657/3000 [05:09<03:33, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1659/3000 [05:09<02:40, 8.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1661/3000 [05:10<03:36, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1663/3000 [05:10<03:22, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1664/3000 [05:11<05:16, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1666/3000 [05:11<06:24, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1669/3000 [05:12<03:43, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1672/3000 [05:12<02:49, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1677/3000 [05:12<01:47, 12.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [05:13<03:29, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1681/3000 [05:13<04:08, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1682/3000 [05:14<04:13, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1683/3000 [05:14<04:33, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1684/3000 [05:14<04:41, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1686/3000 [05:15<05:57, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1687/3000 [05:15<05:05, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1690/3000 [05:15<03:16, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1691/3000 [05:16<03:41, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1695/3000 [05:16<02:48, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1698/3000 [05:16<03:07, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1700/3000 [05:17<02:54, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1701/3000 [05:17<04:34, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1702/3000 [05:17<04:42, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [05:18<04:58, 4.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1705/3000 [05:18<04:59, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1708/3000 [05:19<04:08, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1713/3000 [05:19<02:33, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1715/3000 [05:19<02:25, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1717/3000 [05:20<02:43, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1719/3000 [05:20<03:37, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1720/3000 [05:20<04:00, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1722/3000 [05:21<05:27, 3.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1723/3000 [05:21<04:38, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▊ | 1725/3000 [05:22<04:25, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1728/3000 [05:22<03:27, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1729/3000 [05:22<03:55, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1731/3000 [05:23<03:30, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1733/3000 [05:23<04:01, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1735/3000 [05:23<04:09, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1739/3000 [05:24<02:54, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1741/3000 [05:25<04:06, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1743/3000 [05:25<03:42, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1745/3000 [05:25<04:26, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1748/3000 [05:26<03:36, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1751/3000 [05:26<02:26, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1753/3000 [05:26<02:58, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1757/3000 [05:27<03:00, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1759/3000 [05:27<02:46, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1763/3000 [05:28<03:09, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1765/3000 [05:28<02:44, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1770/3000 [05:29<02:48, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1772/3000 [05:30<03:55, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1773/3000 [05:30<03:47, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1774/3000 [05:30<04:52, 4.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1777/3000 [05:31<03:24, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1780/3000 [05:31<03:35, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1783/3000 [05:32<03:12, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1787/3000 [05:32<03:00, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1791/3000 [05:33<03:19, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1793/3000 [05:33<02:48, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1797/3000 [05:34<02:49, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1799/3000 [05:34<02:36, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1801/3000 [05:35<03:39, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1803/3000 [05:35<03:26, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1807/3000 [05:36<03:46, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1808/3000 [05:36<03:43, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1809/3000 [05:36<03:57, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1810/3000 [05:37<03:59, 4.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1813/3000 [05:37<03:06, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1815/3000 [05:37<02:24, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1816/3000 [05:38<03:34, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1819/3000 [05:38<02:58, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1820/3000 [05:38<03:35, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1822/3000 [05:39<03:18, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1823/3000 [05:39<03:47, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1825/3000 [05:39<03:07, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1827/3000 [05:39<03:20, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1829/3000 [05:40<04:26, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1830/3000 [05:40<04:20, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1833/3000 [05:41<03:28, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1836/3000 [05:41<03:03, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1838/3000 [05:41<02:41, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1839/3000 [05:42<02:37, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1840/3000 [05:42<03:00, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1842/3000 [05:42<04:03, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1845/3000 [05:43<04:33, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1847/3000 [05:44<04:22, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1849/3000 [05:44<03:29, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1851/3000 [05:44<03:41, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1854/3000 [05:45<02:37, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1856/3000 [05:45<02:05, 9.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1858/3000 [05:45<02:11, 8.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1863/3000 [05:46<03:14, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1865/3000 [05:47<03:37, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1867/3000 [05:47<03:54, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1868/3000 [05:48<04:47, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1870/3000 [05:48<04:24, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1871/3000 [05:48<04:38, 4.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▎ | 1875/3000 [05:49<03:15, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1877/3000 [05:49<02:47, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1880/3000 [05:50<02:53, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1882/3000 [05:50<03:24, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1883/3000 [05:50<03:22, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1884/3000 [05:51<03:56, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1885/3000 [05:51<03:55, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1886/3000 [05:51<04:56, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1889/3000 [05:52<03:50, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1891/3000 [05:52<03:19, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1892/3000 [05:52<03:00, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1896/3000 [05:53<02:15, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1897/3000 [05:53<02:31, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1898/3000 [05:53<03:43, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1901/3000 [05:54<02:49, 6.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1902/3000 [05:54<02:35, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1903/3000 [05:54<03:32, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1904/3000 [05:55<05:33, 3.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1905/3000 [05:55<05:46, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1906/3000 [05:55<05:48, 3.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1908/3000 [05:56<04:35, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1909/3000 [05:56<04:18, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1910/3000 [05:56<04:58, 3.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1913/3000 [05:57<04:06, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1915/3000 [05:57<03:25, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1918/3000 [05:58<02:33, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1920/3000 [05:58<02:21, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1922/3000 [05:58<02:06, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1923/3000 [05:58<02:34, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1924/3000 [05:58<03:03, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1925/3000 [06:00<07:37, 2.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1928/3000 [06:00<04:51, 3.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1931/3000 [06:01<03:32, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1933/3000 [06:01<03:25, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1934/3000 [06:01<03:08, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1937/3000 [06:02<02:56, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1939/3000 [06:02<02:32, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1942/3000 [06:02<01:35, 11.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1944/3000 [06:03<04:42, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1946/3000 [06:04<04:57, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1949/3000 [06:04<03:55, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1952/3000 [06:05<02:59, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1954/3000 [06:05<02:56, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1958/3000 [06:05<01:34, 11.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1960/3000 [06:05<01:57, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1962/3000 [06:07<04:05, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1963/3000 [06:07<04:32, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1964/3000 [06:07<04:59, 3.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1969/3000 [06:08<02:53, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1973/3000 [06:08<02:11, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1975/3000 [06:08<02:16, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1978/3000 [06:09<02:28, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1979/3000 [06:09<03:01, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1980/3000 [06:10<03:49, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1982/3000 [06:10<04:44, 3.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1984/3000 [06:11<04:09, 4.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1986/3000 [06:11<02:46, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1991/3000 [06:12<02:07, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▋ | 1994/3000 [06:12<01:48, 9.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 1996/3000 [06:12<01:47, 9.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1999/3000 [06:13<03:15, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2001/3000 [06:14<03:42, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2003/3000 [06:14<04:03, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2006/3000 [06:15<03:09, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2010/3000 [06:15<02:01, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2012/3000 [06:15<01:52, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2014/3000 [06:15<01:49, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2016/3000 [06:16<02:00, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2018/3000 [06:17<03:43, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2020/3000 [06:17<03:32, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2021/3000 [06:17<03:29, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2023/3000 [06:18<03:53, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2026/3000 [06:18<03:28, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2029/3000 [06:19<02:17, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2031/3000 [06:19<02:13, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2035/3000 [06:19<01:48, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2038/3000 [06:20<02:03, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [06:20<03:44, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2040/3000 [06:21<05:27, 2.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2041/3000 [06:22<05:26, 2.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2045/3000 [06:22<02:45, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2047/3000 [06:22<02:50, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2048/3000 [06:22<02:52, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2049/3000 [06:23<05:21, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2051/3000 [06:24<04:27, 3.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2055/3000 [06:24<02:24, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2057/3000 [06:24<02:07, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2058/3000 [06:24<02:25, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2060/3000 [06:25<03:19, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2063/3000 [06:25<02:28, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2065/3000 [06:26<02:32, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2067/3000 [06:26<02:19, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2068/3000 [06:26<02:33, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2069/3000 [06:27<03:21, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2070/3000 [06:27<04:33, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2071/3000 [06:27<04:18, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2073/3000 [06:28<04:00, 3.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2076/3000 [06:28<03:08, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2077/3000 [06:28<02:48, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2078/3000 [06:29<03:14, 4.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2079/3000 [06:29<03:37, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2083/3000 [06:29<01:57, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2086/3000 [06:30<02:06, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2090/3000 [06:30<01:58, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2091/3000 [06:31<03:04, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2095/3000 [06:31<02:03, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2096/3000 [06:31<02:31, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2101/3000 [06:32<02:08, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2103/3000 [06:33<03:16, 4.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2108/3000 [06:33<02:05, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2111/3000 [06:34<02:27, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2113/3000 [06:34<02:35, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2115/3000 [06:35<02:44, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2117/3000 [06:35<01:57, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2118/3000 [06:35<02:42, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2120/3000 [06:36<02:39, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2121/3000 [06:36<02:45, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2122/3000 [06:36<02:54, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2125/3000 [06:36<02:03, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2128/3000 [06:37<02:05, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2130/3000 [06:37<02:42, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2132/3000 [06:38<02:20, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2136/3000 [06:38<01:52, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2141/3000 [06:39<01:32, 9.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2143/3000 [06:39<01:59, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2145/3000 [06:40<02:31, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2146/3000 [06:40<03:27, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2150/3000 [06:41<02:36, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2152/3000 [06:41<02:21, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2153/3000 [06:41<02:55, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2154/3000 [06:42<03:01, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2158/3000 [06:42<02:05, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2159/3000 [06:42<02:30, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2162/3000 [06:43<02:08, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2164/3000 [06:43<02:13, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2166/3000 [06:43<02:21, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2167/3000 [06:44<02:59, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2168/3000 [06:44<03:26, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2172/3000 [06:45<02:38, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2173/3000 [06:45<02:35, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2174/3000 [06:46<03:37, 3.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2177/3000 [06:46<02:50, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2180/3000 [06:46<01:45, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2183/3000 [06:46<01:22, 9.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2185/3000 [06:47<01:47, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2187/3000 [06:48<03:44, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2188/3000 [06:49<04:03, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2189/3000 [06:49<03:59, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2191/3000 [06:49<03:14, 4.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2194/3000 [06:49<02:06, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2197/3000 [06:50<01:29, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2199/3000 [06:50<01:34, 8.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2201/3000 [06:50<01:59, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2203/3000 [06:51<02:00, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2205/3000 [06:51<01:56, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2206/3000 [06:52<03:04, 4.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2209/3000 [06:52<02:26, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2211/3000 [06:53<02:58, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2212/3000 [06:53<02:44, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2214/3000 [06:53<02:33, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2216/3000 [06:54<02:33, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2218/3000 [06:54<02:02, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2221/3000 [06:54<02:11, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2223/3000 [06:55<01:57, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2224/3000 [06:55<02:14, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2226/3000 [06:56<02:53, 4.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2229/3000 [06:56<03:04, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2230/3000 [06:57<03:14, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 2232/3000 [06:57<03:03, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2234/3000 [06:57<02:30, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2238/3000 [06:58<01:44, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2239/3000 [06:58<02:39, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2241/3000 [06:59<02:53, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2243/3000 [06:59<02:19, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2248/3000 [07:01<02:43, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2249/3000 [07:01<02:56, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2250/3000 [07:01<03:23, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2254/3000 [07:02<01:58, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2256/3000 [07:02<01:43, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2258/3000 [07:03<02:40, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2260/3000 [07:03<02:34, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2261/3000 [07:03<02:35, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2263/3000 [07:04<02:58, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2265/3000 [07:04<02:44, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2267/3000 [07:05<02:40, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2269/3000 [07:06<03:23, 3.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2274/3000 [07:06<01:46, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2277/3000 [07:06<01:28, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2281/3000 [07:08<02:20, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2283/3000 [07:08<02:04, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2286/3000 [07:08<01:52, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2287/3000 [07:08<01:56, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2290/3000 [07:09<02:20, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2292/3000 [07:10<02:07, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2294/3000 [07:10<01:47, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2295/3000 [07:10<02:08, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2297/3000 [07:11<02:18, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2299/3000 [07:11<02:11, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2301/3000 [07:11<02:16, 5.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2302/3000 [07:11<02:10, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2303/3000 [07:12<02:47, 4.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2308/3000 [07:13<02:25, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2309/3000 [07:13<02:11, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2310/3000 [07:13<02:16, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2312/3000 [07:14<01:53, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2317/3000 [07:14<01:13, 9.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2319/3000 [07:15<02:00, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2320/3000 [07:15<02:55, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2321/3000 [07:16<02:48, 4.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2322/3000 [07:16<02:52, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2325/3000 [07:16<02:33, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2327/3000 [07:17<02:05, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2329/3000 [07:17<01:32, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2332/3000 [07:17<01:25, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2334/3000 [07:18<01:41, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2337/3000 [07:18<01:26, 7.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2338/3000 [07:18<01:40, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2340/3000 [07:19<02:57, 3.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2341/3000 [07:19<02:32, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2343/3000 [07:20<02:00, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2344/3000 [07:20<02:12, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2346/3000 [07:20<01:49, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2350/3000 [07:20<01:11, 9.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2352/3000 [07:21<02:23, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2354/3000 [07:22<02:27, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2355/3000 [07:22<02:14, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2356/3000 [07:22<02:24, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2358/3000 [07:23<02:32, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2360/3000 [07:23<02:35, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2362/3000 [07:23<01:49, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2365/3000 [07:24<01:38, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2368/3000 [07:24<01:35, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2370/3000 [07:25<02:00, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2372/3000 [07:25<01:24, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2374/3000 [07:26<02:03, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2375/3000 [07:26<03:17, 3.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2376/3000 [07:27<03:04, 3.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2379/3000 [07:27<02:24, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2382/3000 [07:27<01:47, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2385/3000 [07:28<01:38, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2387/3000 [07:28<01:24, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2388/3000 [07:29<02:00, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2391/3000 [07:29<01:43, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2392/3000 [07:29<01:38, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2394/3000 [07:30<02:36, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2396/3000 [07:30<02:23, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2400/3000 [07:31<01:55, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2405/3000 [07:31<01:10, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2407/3000 [07:32<01:43, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2408/3000 [07:32<01:51, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2410/3000 [07:33<02:03, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2412/3000 [07:33<02:13, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2415/3000 [07:34<01:35, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2417/3000 [07:34<01:24, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2419/3000 [07:34<01:50, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2421/3000 [07:35<01:33, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2422/3000 [07:35<01:40, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2424/3000 [07:35<02:08, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2425/3000 [07:36<02:32, 3.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2427/3000 [07:36<02:25, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2429/3000 [07:37<01:37, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2431/3000 [07:37<01:33, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2435/3000 [07:38<01:43, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2438/3000 [07:38<01:19, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2440/3000 [07:38<01:04, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2442/3000 [07:38<01:09, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████▏ | 2444/3000 [07:39<02:11, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2446/3000 [07:40<02:11, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2449/3000 [07:40<01:44, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2453/3000 [07:41<01:53, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2456/3000 [07:42<01:28, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2457/3000 [07:42<01:35, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2460/3000 [07:43<01:46, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2461/3000 [07:43<02:11, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2464/3000 [07:43<01:35, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2465/3000 [07:44<01:55, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2466/3000 [07:44<01:59, 4.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2469/3000 [07:45<01:45, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2472/3000 [07:45<01:28, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2473/3000 [07:45<01:23, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▎ | 2475/3000 [07:45<01:15, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2476/3000 [07:46<01:50, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2480/3000 [07:46<01:12, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2482/3000 [07:46<01:04, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2483/3000 [07:47<01:42, 5.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2485/3000 [07:47<01:33, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2487/3000 [07:48<02:01, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2488/3000 [07:48<01:57, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2490/3000 [07:49<01:43, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2493/3000 [07:49<01:30, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2497/3000 [07:50<01:03, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2500/3000 [07:50<01:18, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2503/3000 [07:51<01:31, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2505/3000 [07:51<01:32, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2507/3000 [07:52<01:36, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2510/3000 [07:53<02:07, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▎ | 2511/3000 [07:53<02:18, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2514/3000 [07:54<01:44, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2517/3000 [07:54<01:07, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2518/3000 [07:54<01:11, 6.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2520/3000 [07:54<01:23, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2523/3000 [07:55<01:18, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2525/3000 [07:56<01:40, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2528/3000 [07:56<01:20, 5.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2529/3000 [07:56<01:24, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2531/3000 [07:57<02:01, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2533/3000 [07:57<01:39, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2535/3000 [07:58<01:35, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2537/3000 [07:58<01:25, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2541/3000 [07:58<01:09, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2544/3000 [07:59<01:05, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2547/3000 [07:59<01:13, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2548/3000 [08:00<01:19, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2550/3000 [08:00<01:32, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2552/3000 [08:01<01:54, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2553/3000 [08:01<02:08, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2555/3000 [08:02<01:45, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2558/3000 [08:02<01:14, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2561/3000 [08:02<01:01, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2563/3000 [08:03<01:01, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2566/3000 [08:03<01:02, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2567/3000 [08:04<01:38, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2569/3000 [08:04<01:51, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2574/3000 [08:05<01:24, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2576/3000 [08:05<01:15, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2577/3000 [08:06<01:19, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2579/3000 [08:06<01:26, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2581/3000 [08:06<01:17, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2584/3000 [08:07<01:12, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2586/3000 [08:07<01:04, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2588/3000 [08:07<01:02, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2590/3000 [08:09<02:21, 2.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2591/3000 [08:09<02:16, 2.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2593/3000 [08:09<01:42, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2595/3000 [08:10<01:12, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2597/3000 [08:10<01:24, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2599/3000 [08:10<01:14, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2601/3000 [08:11<01:14, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2602/3000 [08:11<01:34, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2605/3000 [08:12<01:10, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2606/3000 [08:12<01:23, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2608/3000 [08:12<01:08, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2611/3000 [08:13<01:02, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2613/3000 [08:13<01:12, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2615/3000 [08:13<01:07, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2618/3000 [08:14<00:55, 6.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2622/3000 [08:14<00:43, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2623/3000 [08:15<01:12, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2624/3000 [08:15<01:41, 3.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2625/3000 [08:15<01:35, 3.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2629/3000 [08:17<01:32, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2632/3000 [08:17<01:06, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2634/3000 [08:17<01:03, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2635/3000 [08:17<01:03, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2638/3000 [08:18<00:52, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2640/3000 [08:18<00:47, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2642/3000 [08:18<00:56, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2644/3000 [08:19<01:39, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2646/3000 [08:19<01:07, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2647/3000 [08:20<01:08, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2652/3000 [08:21<01:02, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2654/3000 [08:21<00:53, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2656/3000 [08:21<00:54, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▊ | 2659/3000 [08:22<00:54, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2660/3000 [08:22<00:52, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2661/3000 [08:22<00:59, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2662/3000 [08:23<01:16, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2663/3000 [08:23<02:07, 2.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2666/3000 [08:24<01:38, 3.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2669/3000 [08:24<01:00, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2672/3000 [08:25<00:57, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2675/3000 [08:25<00:42, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2677/3000 [08:26<01:01, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2678/3000 [08:26<00:59, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2679/3000 [08:27<01:25, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2680/3000 [08:27<01:36, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2682/3000 [08:28<01:53, 2.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2685/3000 [08:28<00:57, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2687/3000 [08:29<01:01, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2688/3000 [08:29<01:05, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2691/3000 [08:29<00:47, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2693/3000 [08:29<00:50, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2694/3000 [08:30<01:04, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2696/3000 [08:31<01:24, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2698/3000 [08:31<01:20, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2699/3000 [08:31<01:15, 3.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2701/3000 [08:32<00:56, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2705/3000 [08:32<00:41, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2709/3000 [08:33<00:34, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2711/3000 [08:33<00:43, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2714/3000 [08:34<00:57, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2716/3000 [08:34<00:52, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2718/3000 [08:35<01:15, 3.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2721/3000 [08:35<00:42, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2724/3000 [08:36<00:56, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2727/3000 [08:36<00:40, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2728/3000 [08:37<00:45, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2730/3000 [08:37<00:45, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2731/3000 [08:38<01:20, 3.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2734/3000 [08:38<00:53, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2736/3000 [08:38<00:42, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2740/3000 [08:39<00:31, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2741/3000 [08:39<00:59, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2742/3000 [08:39<00:58, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 2744/3000 [08:40<00:56, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2745/3000 [08:40<01:15, 3.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2746/3000 [08:41<01:34, 2.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2755/3000 [08:42<00:28, 8.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2757/3000 [08:42<00:36, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2762/3000 [08:43<00:41, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2764/3000 [08:44<01:01, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2767/3000 [08:45<00:53, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2768/3000 [08:45<00:54, 4.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2772/3000 [08:45<00:35, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2773/3000 [08:46<00:46, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2774/3000 [08:46<00:48, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2777/3000 [08:47<00:39, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2779/3000 [08:47<00:30, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2780/3000 [08:47<00:36, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [08:47<00:51, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2784/3000 [08:48<00:35, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2787/3000 [08:48<00:31, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2789/3000 [08:49<00:40, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2791/3000 [08:49<00:30, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2792/3000 [08:49<00:29, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2793/3000 [08:49<00:37, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2796/3000 [08:50<00:34, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2797/3000 [08:50<00:37, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2801/3000 [08:51<00:36, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2802/3000 [08:51<00:54, 3.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2804/3000 [08:52<00:49, 3.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2807/3000 [08:52<00:41, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2810/3000 [08:52<00:26, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2813/3000 [08:53<00:21, 8.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2815/3000 [08:53<00:22, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2816/3000 [08:53<00:27, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2817/3000 [08:54<00:35, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2820/3000 [08:54<00:35, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2821/3000 [08:55<00:45, 3.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2825/3000 [08:55<00:33, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2826/3000 [08:56<00:33, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2828/3000 [08:56<00:31, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2830/3000 [08:56<00:24, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2831/3000 [08:56<00:29, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2832/3000 [08:57<00:32, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2837/3000 [08:57<00:21, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2838/3000 [08:57<00:21, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2839/3000 [08:58<00:25, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2841/3000 [08:58<00:27, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2842/3000 [08:58<00:38, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2844/3000 [08:59<00:30, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2845/3000 [08:59<00:30, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2846/3000 [08:59<00:31, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2848/3000 [08:59<00:27, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2849/3000 [09:00<00:31, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2850/3000 [09:00<00:31, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2852/3000 [09:00<00:28, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2856/3000 [09:01<00:25, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2860/3000 [09:01<00:14, 9.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2862/3000 [09:01<00:13, 10.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2864/3000 [09:02<00:26, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2868/3000 [09:03<00:24, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2870/3000 [09:04<00:27, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2871/3000 [09:04<00:26, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2877/3000 [09:04<00:13, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2879/3000 [09:04<00:15, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2882/3000 [09:05<00:20, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2884/3000 [09:06<00:20, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2885/3000 [09:06<00:25, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2886/3000 [09:06<00:29, 3.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2889/3000 [09:07<00:20, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2894/3000 [09:07<00:14, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2896/3000 [09:07<00:11, 8.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2898/3000 [09:08<00:14, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2901/3000 [09:09<00:17, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2903/3000 [09:09<00:17, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2907/3000 [09:09<00:09, 9.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2909/3000 [09:10<00:19, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2912/3000 [09:10<00:14, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2916/3000 [09:11<00:14, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2918/3000 [09:12<00:13, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2920/3000 [09:12<00:11, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2923/3000 [09:12<00:10, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2924/3000 [09:13<00:14, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2927/3000 [09:13<00:12, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2931/3000 [09:13<00:08, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2933/3000 [09:14<00:11, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2934/3000 [09:14<00:10, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2935/3000 [09:14<00:12, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2937/3000 [09:15<00:10, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2939/3000 [09:15<00:11, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2942/3000 [09:16<00:10, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2943/3000 [09:16<00:09, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2945/3000 [09:16<00:09, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2947/3000 [09:17<00:10, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2951/3000 [09:17<00:06, 7.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2952/3000 [09:17<00:08, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2954/3000 [09:18<00:07, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2956/3000 [09:18<00:06, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2958/3000 [09:18<00:07, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2961/3000 [09:19<00:05, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2963/3000 [09:19<00:05, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2964/3000 [09:19<00:06, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2965/3000 [09:20<00:07, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2967/3000 [09:20<00:07, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2968/3000 [09:20<00:07, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2971/3000 [09:21<00:05, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2973/3000 [09:21<00:03, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2975/3000 [09:21<00:02, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2977/3000 [09:22<00:05, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2979/3000 [09:23<00:05, 3.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2981/3000 [09:23<00:04, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2984/3000 [09:24<00:03, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2987/3000 [09:24<00:02, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2989/3000 [09:24<00:01, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2991/3000 [09:24<00:01, 7.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2994/3000 [09:25<00:00, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2995/3000 [09:25<00:01, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2996/3000 [09:26<00:01, 2.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2999/3000 [09:27<00:00, 3.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|██████████| 3000/3000 [09:27<00:00, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2026-01-03 19:13:41.463\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1mEvaluation metrics (after optimization): {'f1': 0.014, 'em': 0.014, 'acc': 0.29933333333333334}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5713de9",
"metadata": {},
"outputs": [],
"source": [
"textgrad_optimizer.save(\"./debug/textgradinfo_norman.json\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "16a8bf00",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2026-01-03 19:18:27.139\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/norman_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-03 19:18:27.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/norman_train.json ...\u001b[0m\n",
"\u001b[32m2026-01-03 19:18:27.141\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.pertqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m52\u001b[0m - \u001b[1mloading HotPotQA data from /home/tl688/pitl688/selfevolve/EvoAgentX/examples/pertqa/norman_test.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = PertQA(pertdata='norman')\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "b424bf18",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 2/3000 [00:02<55:54, 1.12s/it] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 8/3000 [00:03<12:00, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 12/3000 [00:03<08:17, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 14/3000 [00:04<08:38, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 16/3000 [00:04<09:02, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 18/3000 [00:04<08:21, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 20/3000 [00:04<07:24, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 24/3000 [00:05<05:29, 9.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 28/3000 [00:05<06:26, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 29/3000 [00:06<06:53, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 30/3000 [00:06<07:56, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 32/3000 [00:06<07:51, 6.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 33/3000 [00:06<08:17, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 34/3000 [00:07<10:44, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%| | 36/3000 [00:07<10:29, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%|▏ | 43/3000 [00:08<05:16, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 45/3000 [00:08<07:42, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 49/3000 [00:08<05:20, 9.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 51/3000 [00:09<06:09, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 54/3000 [00:10<09:19, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 57/3000 [00:10<06:43, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 59/3000 [00:10<06:41, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 62/3000 [00:11<10:26, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 65/3000 [00:12<07:57, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 68/3000 [00:12<05:14, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 70/3000 [00:12<06:09, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 72/3000 [00:12<05:48, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 74/3000 [00:13<10:25, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 76/3000 [00:13<10:15, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 77/3000 [00:14<11:08, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 80/3000 [00:14<08:39, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 81/3000 [00:14<07:52, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 85/3000 [00:15<05:40, 8.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 87/3000 [00:15<04:46, 10.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 89/3000 [00:15<05:19, 9.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 91/3000 [00:16<10:14, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 95/3000 [00:17<08:33, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 100/3000 [00:17<07:31, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 3%|▎ | 103/3000 [00:18<06:41, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 105/3000 [00:18<07:03, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 107/3000 [00:18<06:28, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 108/3000 [00:19<11:41, 4.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▎ | 110/3000 [00:19<11:42, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 115/3000 [00:20<06:37, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 117/3000 [00:20<05:46, 8.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 122/3000 [00:20<05:11, 9.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 125/3000 [00:21<07:01, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▍ | 127/3000 [00:21<06:41, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 128/3000 [00:22<09:39, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 129/3000 [00:22<10:26, 4.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 131/3000 [00:22<08:14, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 133/3000 [00:22<07:43, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 135/3000 [00:23<07:39, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 141/3000 [00:23<05:38, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 146/3000 [00:24<05:20, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 149/3000 [00:25<08:22, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 150/3000 [00:25<07:41, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 153/3000 [00:25<06:58, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 155/3000 [00:25<05:55, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 158/3000 [00:26<06:20, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 161/3000 [00:26<04:11, 11.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 163/3000 [00:26<04:28, 10.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 166/3000 [00:27<06:53, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 169/3000 [00:27<07:49, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 177/3000 [00:28<05:21, 8.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 179/3000 [00:29<05:48, 8.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 182/3000 [00:29<04:56, 9.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 184/3000 [00:29<05:07, 9.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▌ | 187/3000 [00:30<07:05, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 188/3000 [00:30<08:54, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 6%|▋ | 190/3000 [00:31<09:18, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 192/3000 [00:31<08:33, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 194/3000 [00:31<08:24, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 198/3000 [00:32<06:07, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 200/3000 [00:32<08:33, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 201/3000 [00:33<09:41, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 205/3000 [00:33<06:56, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 208/3000 [00:33<06:43, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 210/3000 [00:34<08:05, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 214/3000 [00:34<05:48, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 217/3000 [00:35<07:14, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 220/3000 [00:35<04:56, 9.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 222/3000 [00:35<05:17, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 224/3000 [00:36<06:00, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 225/3000 [00:36<07:34, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 230/3000 [00:37<08:10, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 232/3000 [00:37<08:04, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 235/3000 [00:38<05:49, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 236/3000 [00:38<06:19, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 238/3000 [00:38<07:15, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 243/3000 [00:39<06:28, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 244/3000 [00:39<10:23, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 246/3000 [00:40<09:45, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 251/3000 [00:40<05:48, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 255/3000 [00:41<04:29, 10.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 257/3000 [00:41<08:39, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 259/3000 [00:42<07:41, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▊ | 261/3000 [00:42<08:20, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 265/3000 [00:43<07:28, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 269/3000 [00:43<06:26, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 271/3000 [00:43<05:48, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 274/3000 [00:44<07:11, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 276/3000 [00:45<09:14, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 279/3000 [00:45<08:05, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▉ | 283/3000 [00:46<06:15, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 287/3000 [00:46<07:23, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 290/3000 [00:47<05:29, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 294/3000 [00:47<05:35, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|▉ | 299/3000 [00:48<06:29, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 304/3000 [00:49<05:52, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 306/3000 [00:49<05:58, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 308/3000 [00:49<05:23, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 310/3000 [00:50<07:21, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 312/3000 [00:50<08:16, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 313/3000 [00:50<09:01, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 10%|█ | 315/3000 [00:51<09:40, 4.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 318/3000 [00:51<06:17, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 320/3000 [00:51<05:28, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 323/3000 [00:52<05:16, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 325/3000 [00:52<07:35, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 328/3000 [00:53<07:06, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 331/3000 [00:53<05:54, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 332/3000 [00:53<07:03, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 337/3000 [00:54<05:46, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 338/3000 [00:54<05:38, 7.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█▏ | 339/3000 [00:54<06:57, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 342/3000 [00:55<06:21, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 345/3000 [00:55<06:28, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 347/3000 [00:56<07:02, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 352/3000 [00:56<03:47, 11.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 356/3000 [00:56<05:16, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 358/3000 [00:57<05:59, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 360/3000 [00:57<05:56, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 362/3000 [00:58<07:29, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 364/3000 [00:58<05:41, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 368/3000 [00:58<06:24, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 369/3000 [00:58<06:37, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 371/3000 [00:59<06:59, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▎ | 375/3000 [00:59<04:55, 8.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 378/3000 [01:00<05:34, 7.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 379/3000 [01:00<05:29, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 381/3000 [01:00<06:34, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 382/3000 [01:00<07:10, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 383/3000 [01:01<08:08, 5.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 384/3000 [01:01<09:29, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 386/3000 [01:01<09:02, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 392/3000 [01:02<05:27, 7.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 393/3000 [01:02<05:56, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 394/3000 [01:02<08:52, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 398/3000 [01:03<05:43, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 401/3000 [01:03<04:29, 9.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 403/3000 [01:03<04:31, 9.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 405/3000 [01:03<04:41, 9.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 407/3000 [01:04<05:16, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 409/3000 [01:04<06:44, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▎ | 412/3000 [01:05<06:34, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 415/3000 [01:05<05:38, 7.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 416/3000 [01:05<07:20, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 417/3000 [01:06<07:56, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 420/3000 [01:06<07:41, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 423/3000 [01:06<05:40, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 428/3000 [01:07<04:44, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 430/3000 [01:07<05:11, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 432/3000 [01:08<06:19, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 435/3000 [01:08<06:56, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 438/3000 [01:09<07:18, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 442/3000 [01:09<05:10, 8.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▍ | 444/3000 [01:09<05:42, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 448/3000 [01:10<05:21, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 450/3000 [01:10<05:34, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 452/3000 [01:11<05:15, 8.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 454/3000 [01:11<05:47, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 457/3000 [01:12<07:23, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▌ | 462/3000 [01:12<05:02, 8.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 464/3000 [01:12<04:40, 9.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 466/3000 [01:13<06:07, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 471/3000 [01:13<04:31, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 474/3000 [01:13<03:49, 11.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 476/3000 [01:13<03:59, 10.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 480/3000 [01:14<05:12, 8.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 483/3000 [01:15<07:11, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 485/3000 [01:15<06:26, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 486/3000 [01:15<06:49, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▋ | 488/3000 [01:16<08:06, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 490/3000 [01:16<06:07, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 493/3000 [01:16<04:39, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▋ | 495/3000 [01:16<04:33, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 498/3000 [01:17<06:06, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 499/3000 [01:18<08:45, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 503/3000 [01:18<06:23, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 505/3000 [01:18<05:12, 7.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 507/3000 [01:18<05:13, 7.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 509/3000 [01:19<04:49, 8.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 511/3000 [01:19<04:37, 8.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 514/3000 [01:20<06:39, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 518/3000 [01:20<04:24, 9.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 520/3000 [01:20<05:22, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 522/3000 [01:21<05:58, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 526/3000 [01:21<05:11, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 528/3000 [01:22<06:08, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 530/3000 [01:22<07:26, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 531/3000 [01:22<08:00, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 535/3000 [01:23<04:48, 8.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 537/3000 [01:23<05:48, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 538/3000 [01:23<05:51, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 539/3000 [01:23<08:13, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 541/3000 [01:24<09:02, 4.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 544/3000 [01:24<07:58, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 546/3000 [01:25<06:03, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 550/3000 [01:25<04:26, 9.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 552/3000 [01:25<03:59, 10.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 18%|█▊ | 554/3000 [01:26<07:50, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 556/3000 [01:26<07:49, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 557/3000 [01:27<07:58, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▊ | 562/3000 [01:27<05:33, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 564/3000 [01:28<07:23, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 567/3000 [01:28<05:30, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 568/3000 [01:28<06:43, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 573/3000 [01:28<04:13, 9.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 575/3000 [01:29<03:54, 10.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 577/3000 [01:29<04:53, 8.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 579/3000 [01:29<05:19, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 581/3000 [01:30<05:41, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 19%|█▉ | 584/3000 [01:30<06:00, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 585/3000 [01:30<07:18, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 589/3000 [01:31<06:10, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 592/3000 [01:31<04:58, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 595/3000 [01:32<04:29, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|█▉ | 599/3000 [01:33<06:21, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 602/3000 [01:33<05:22, 7.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 606/3000 [01:33<04:42, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 608/3000 [01:34<04:53, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 610/3000 [01:34<06:07, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 613/3000 [01:35<07:37, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 20%|██ | 615/3000 [01:35<08:16, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 616/3000 [01:36<10:07, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 619/3000 [01:36<06:56, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 623/3000 [01:36<04:38, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 625/3000 [01:37<04:29, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██ | 628/3000 [01:37<05:19, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 632/3000 [01:37<04:06, 9.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 634/3000 [01:38<06:44, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 638/3000 [01:39<06:05, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 641/3000 [01:39<04:37, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 21%|██▏ | 644/3000 [01:40<06:10, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 647/3000 [01:40<04:51, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 649/3000 [01:40<04:53, 8.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 651/3000 [01:41<07:32, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 656/3000 [01:41<05:07, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 658/3000 [01:41<04:57, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 659/3000 [01:42<04:55, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 663/3000 [01:42<04:12, 9.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 665/3000 [01:42<03:44, 10.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 668/3000 [01:43<06:09, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 670/3000 [01:43<07:34, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 674/3000 [01:44<05:33, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 679/3000 [01:44<03:52, 9.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 681/3000 [01:44<04:09, 9.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 683/3000 [01:45<04:36, 8.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 686/3000 [01:45<06:27, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 689/3000 [01:46<05:37, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 691/3000 [01:46<04:30, 8.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 693/3000 [01:46<04:22, 8.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 695/3000 [01:47<05:14, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 697/3000 [01:47<05:41, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 698/3000 [01:47<07:24, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 700/3000 [01:48<07:53, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 705/3000 [01:48<05:13, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▎ | 708/3000 [01:49<05:11, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 709/3000 [01:49<05:24, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 711/3000 [01:49<06:08, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 712/3000 [01:49<07:04, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 715/3000 [01:50<05:22, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 719/3000 [01:50<03:53, 9.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 721/3000 [01:50<03:56, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 724/3000 [01:51<07:08, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 728/3000 [01:52<05:15, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 731/3000 [01:52<04:55, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▍ | 732/3000 [01:52<06:08, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 735/3000 [01:53<05:28, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 737/3000 [01:53<04:54, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 738/3000 [01:53<06:51, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 742/3000 [01:54<04:55, 7.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 743/3000 [01:54<05:40, 6.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 746/3000 [01:55<06:04, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 748/3000 [01:55<05:00, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 751/3000 [01:55<04:18, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 753/3000 [01:55<04:17, 8.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 757/3000 [01:56<04:38, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 759/3000 [01:56<04:23, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 760/3000 [01:56<06:26, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 762/3000 [01:57<08:40, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 765/3000 [01:57<05:14, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 769/3000 [01:58<04:59, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 774/3000 [01:58<04:08, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 777/3000 [01:59<05:58, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 780/3000 [01:59<04:20, 8.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 782/3000 [02:00<03:51, 9.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 784/3000 [02:00<04:38, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 788/3000 [02:00<03:48, 9.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 790/3000 [02:00<04:06, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 792/3000 [02:01<05:01, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▋ | 795/3000 [02:02<07:00, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 797/3000 [02:02<05:45, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 799/3000 [02:02<05:25, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 802/3000 [02:03<05:10, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 805/3000 [02:03<04:32, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 807/3000 [02:03<03:34, 10.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 809/3000 [02:04<05:46, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 812/3000 [02:04<04:47, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 814/3000 [02:04<05:12, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 816/3000 [02:04<04:58, 7.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 818/3000 [02:05<04:51, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 819/3000 [02:05<04:54, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 820/3000 [02:05<05:57, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 821/3000 [02:05<06:18, 5.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 825/3000 [02:06<04:32, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 827/3000 [02:06<03:45, 9.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 830/3000 [02:06<04:16, 8.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 832/3000 [02:07<04:46, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 833/3000 [02:07<05:12, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 834/3000 [02:07<07:10, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 836/3000 [02:07<06:19, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 839/3000 [02:08<05:34, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 840/3000 [02:08<06:04, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 843/3000 [02:08<04:51, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 844/3000 [02:09<04:46, 7.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 848/3000 [02:09<03:49, 9.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 850/3000 [02:09<04:56, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 852/3000 [02:10<05:39, 6.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 28%|██▊ | 855/3000 [02:10<04:49, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 858/3000 [02:11<05:10, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▊ | 860/3000 [02:11<04:56, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 863/3000 [02:11<05:21, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 865/3000 [02:12<05:52, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 866/3000 [02:12<08:30, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 869/3000 [02:13<06:18, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 871/3000 [02:13<06:22, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 875/3000 [02:13<03:40, 9.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 877/3000 [02:13<03:16, 10.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 879/3000 [02:14<04:12, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 881/3000 [02:14<05:38, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 882/3000 [02:14<05:57, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 884/3000 [02:15<05:47, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 886/3000 [02:15<04:22, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 887/3000 [02:15<06:18, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 893/3000 [02:16<03:33, 9.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|██▉ | 897/3000 [02:16<04:25, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 899/3000 [02:16<03:46, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 903/3000 [02:17<04:09, 8.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 905/3000 [02:18<05:34, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 907/3000 [02:18<05:38, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 909/3000 [02:18<05:33, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|███ | 911/3000 [02:19<06:28, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 917/3000 [02:19<03:41, 9.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 919/3000 [02:19<03:19, 10.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 921/3000 [02:20<07:03, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 923/3000 [02:20<05:38, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 925/3000 [02:21<06:25, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 927/3000 [02:21<05:44, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 930/3000 [02:21<05:22, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 934/3000 [02:22<03:30, 9.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 937/3000 [02:22<05:14, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 939/3000 [02:23<05:51, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 941/3000 [02:23<05:30, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███▏ | 943/3000 [02:23<05:02, 6.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 946/3000 [02:24<03:47, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 947/3000 [02:24<05:29, 6.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 949/3000 [02:25<07:54, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 953/3000 [02:25<05:21, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 955/3000 [02:25<04:27, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 957/3000 [02:25<03:48, 8.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 959/3000 [02:26<03:42, 9.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 962/3000 [02:26<05:21, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 963/3000 [02:27<05:52, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 964/3000 [02:27<06:21, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 971/3000 [02:28<04:30, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 973/3000 [02:28<05:57, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 976/3000 [02:29<04:40, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 978/3000 [02:29<04:36, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 979/3000 [02:29<04:59, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 983/3000 [02:29<03:35, 9.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 987/3000 [02:30<05:08, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 989/3000 [02:31<05:02, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 991/3000 [02:31<07:41, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 995/3000 [02:32<05:16, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 997/3000 [02:33<07:29, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 1002/3000 [02:33<04:45, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1006/3000 [02:34<04:39, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▎ | 1008/3000 [02:34<04:49, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▎ | 1012/3000 [02:35<04:56, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1014/3000 [02:35<05:25, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1018/3000 [02:36<04:21, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1020/3000 [02:36<05:48, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1022/3000 [02:37<06:31, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1025/3000 [02:37<06:22, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 1028/3000 [02:37<04:37, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1030/3000 [02:38<03:36, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1032/3000 [02:38<05:56, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1033/3000 [02:39<06:55, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 1034/3000 [02:39<07:54, 4.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1036/3000 [02:39<06:53, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1039/3000 [02:40<05:44, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1042/3000 [02:40<04:23, 7.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1045/3000 [02:41<05:36, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▍ | 1047/3000 [02:41<04:53, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 1048/3000 [02:41<08:27, 3.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1051/3000 [02:42<06:15, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1053/3000 [02:42<05:17, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1054/3000 [02:42<07:06, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1056/3000 [02:43<05:34, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1058/3000 [02:43<05:15, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 1059/3000 [02:43<06:46, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 1064/3000 [02:44<03:57, 8.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1066/3000 [02:45<06:36, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1067/3000 [02:45<07:42, 4.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1069/3000 [02:45<07:19, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1072/3000 [02:46<05:23, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1073/3000 [02:46<05:37, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1077/3000 [02:46<03:58, 8.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1079/3000 [02:46<04:10, 7.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1082/3000 [02:47<05:23, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1083/3000 [02:47<05:55, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 1086/3000 [02:48<05:21, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 1087/3000 [02:48<04:53, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1089/3000 [02:49<05:54, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▋ | 1091/3000 [02:49<04:23, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▋ | 1094/3000 [02:50<07:26, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1096/3000 [02:50<05:28, 5.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1098/3000 [02:50<05:46, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1100/3000 [02:51<04:59, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1104/3000 [02:51<04:03, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1107/3000 [02:51<04:31, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1109/3000 [02:52<04:54, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1110/3000 [02:53<08:46, 3.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1111/3000 [02:53<08:23, 3.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1114/3000 [02:53<06:22, 4.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1116/3000 [02:54<05:36, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1117/3000 [02:54<05:58, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1119/3000 [02:54<05:04, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 1120/3000 [02:54<05:31, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 1123/3000 [02:55<04:11, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1126/3000 [02:55<03:48, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1128/3000 [02:55<05:24, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1133/3000 [02:56<04:12, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1135/3000 [02:56<04:33, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1138/3000 [02:57<03:17, 9.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1140/3000 [02:57<03:24, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1142/3000 [02:58<06:07, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1144/3000 [02:58<07:29, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1146/3000 [02:59<05:57, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 38%|███▊ | 1149/3000 [02:59<04:27, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1152/3000 [02:59<03:05, 9.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 1154/3000 [03:00<05:48, 5.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1156/3000 [03:00<06:17, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▊ | 1158/3000 [03:01<05:14, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 1162/3000 [03:01<03:41, 8.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1163/3000 [03:01<04:50, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1166/3000 [03:02<03:54, 7.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1168/3000 [03:02<03:35, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1170/3000 [03:02<04:32, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1172/3000 [03:03<04:45, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1174/3000 [03:03<04:59, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1177/3000 [03:03<04:11, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 1180/3000 [03:04<04:03, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1181/3000 [03:04<04:17, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 1184/3000 [03:04<03:10, 9.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1185/3000 [03:04<04:11, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1190/3000 [03:05<02:54, 10.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|███▉ | 1194/3000 [03:05<02:39, 11.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1196/3000 [03:06<07:02, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 1197/3000 [03:06<06:38, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1201/3000 [03:07<04:42, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1204/3000 [03:07<03:39, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1206/3000 [03:07<03:26, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1208/3000 [03:07<03:20, 8.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1210/3000 [03:08<03:57, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|████ | 1211/3000 [03:08<03:55, 7.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 1215/3000 [03:09<05:24, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1219/3000 [03:09<04:45, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1220/3000 [03:10<04:46, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1222/3000 [03:10<04:17, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1225/3000 [03:10<03:54, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1228/3000 [03:11<03:44, 7.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 1230/3000 [03:11<04:18, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1232/3000 [03:11<05:15, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 1235/3000 [03:12<04:45, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1239/3000 [03:12<03:55, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████▏ | 1242/3000 [03:13<03:30, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 1244/3000 [03:13<03:15, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1247/3000 [03:13<04:04, 7.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1249/3000 [03:14<03:34, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1251/3000 [03:14<03:37, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1252/3000 [03:14<03:51, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1254/3000 [03:14<04:27, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1256/3000 [03:15<04:31, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1259/3000 [03:15<03:23, 8.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1261/3000 [03:15<04:29, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1263/3000 [03:16<06:17, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1266/3000 [03:16<04:09, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 1268/3000 [03:17<05:31, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1271/3000 [03:17<04:47, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 1274/3000 [03:18<03:36, 7.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1277/3000 [03:18<04:49, 5.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1280/3000 [03:19<04:01, 7.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1281/3000 [03:19<06:49, 4.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1283/3000 [03:20<06:01, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1284/3000 [03:20<05:28, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1285/3000 [03:20<05:46, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1288/3000 [03:20<04:27, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1291/3000 [03:21<03:26, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1294/3000 [03:21<03:42, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1296/3000 [03:21<04:01, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1298/3000 [03:22<04:27, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 1299/3000 [03:22<08:21, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 1302/3000 [03:23<05:54, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1307/3000 [03:23<04:09, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▎ | 1312/3000 [03:24<02:59, 9.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1314/3000 [03:24<03:06, 9.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1315/3000 [03:24<04:03, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1318/3000 [03:25<03:51, 7.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1320/3000 [03:25<03:26, 8.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1321/3000 [03:25<05:02, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1324/3000 [03:26<05:18, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1327/3000 [03:26<03:52, 7.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1328/3000 [03:26<04:35, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1330/3000 [03:27<04:57, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 1331/3000 [03:27<05:08, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 44%|████▍ | 1335/3000 [03:27<03:46, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1338/3000 [03:28<02:53, 9.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1340/3000 [03:28<03:09, 8.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1342/3000 [03:28<03:53, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1344/3000 [03:29<05:16, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▍ | 1345/3000 [03:29<05:20, 5.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▍ | 1349/3000 [03:30<03:35, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1352/3000 [03:30<03:55, 7.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1355/3000 [03:30<03:39, 7.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1357/3000 [03:31<04:35, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1358/3000 [03:31<04:48, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 1362/3000 [03:31<03:19, 8.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 1364/3000 [03:32<03:00, 9.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1366/3000 [03:32<04:11, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1368/3000 [03:32<03:27, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1372/3000 [03:33<03:37, 7.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1373/3000 [03:34<06:09, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1375/3000 [03:34<05:18, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 1378/3000 [03:34<04:16, 6.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1380/3000 [03:34<03:32, 7.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1384/3000 [03:35<03:43, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 1387/3000 [03:35<03:36, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1389/3000 [03:36<04:10, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1390/3000 [03:36<04:10, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▋ | 1391/3000 [03:36<05:17, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 1394/3000 [03:37<04:36, 5.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1398/3000 [03:37<02:56, 9.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1400/3000 [03:37<03:12, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1401/3000 [03:38<04:37, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1402/3000 [03:38<06:35, 4.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1404/3000 [03:39<06:17, 4.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1408/3000 [03:39<04:22, 6.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1409/3000 [03:39<04:28, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 47%|████▋ | 1413/3000 [03:40<03:48, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1415/3000 [03:40<03:32, 7.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1417/3000 [03:40<02:57, 8.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1419/3000 [03:40<02:42, 9.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1421/3000 [03:41<03:50, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1423/3000 [03:41<03:42, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 1424/3000 [03:42<04:54, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1428/3000 [03:42<03:48, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1430/3000 [03:42<03:36, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1432/3000 [03:43<04:58, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1434/3000 [03:43<05:33, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1436/3000 [03:44<04:41, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1439/3000 [03:44<03:30, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1440/3000 [03:44<04:09, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1443/3000 [03:44<03:25, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1445/3000 [03:45<04:07, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 1446/3000 [03:45<05:21, 4.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1452/3000 [03:46<04:12, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 1455/3000 [03:47<04:00, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▊ | 1459/3000 [03:47<02:48, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▊ | 1462/3000 [03:47<02:36, 9.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1464/3000 [03:48<02:41, 9.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1466/3000 [03:49<05:21, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1472/3000 [03:49<04:05, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1476/3000 [03:50<03:46, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 1479/3000 [03:50<03:07, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1481/3000 [03:51<03:50, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1482/3000 [03:51<04:39, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 1483/3000 [03:51<05:16, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1486/3000 [03:52<03:56, 6.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1488/3000 [03:52<04:26, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|████▉ | 1492/3000 [03:52<02:57, 8.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1494/3000 [03:53<03:38, 6.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1497/3000 [03:53<03:05, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 1499/3000 [03:53<03:47, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1500/3000 [03:54<04:31, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1503/3000 [03:54<04:22, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1504/3000 [03:55<05:22, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1509/3000 [03:55<03:12, 7.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 1511/3000 [03:56<04:37, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 1515/3000 [03:56<03:52, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1517/3000 [03:57<03:54, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1520/3000 [03:57<03:51, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1522/3000 [03:58<04:07, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1523/3000 [03:58<04:10, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1524/3000 [03:58<04:46, 5.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1525/3000 [03:58<05:16, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1528/3000 [03:59<03:42, 6.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 1531/3000 [03:59<03:24, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1532/3000 [03:59<03:33, 6.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 1535/3000 [04:00<03:52, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1539/3000 [04:00<03:05, 7.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 1541/3000 [04:00<02:44, 8.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 1544/3000 [04:01<03:40, 6.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1545/3000 [04:01<03:59, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1547/3000 [04:01<04:13, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1551/3000 [04:02<03:28, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1552/3000 [04:02<04:16, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1555/3000 [04:03<03:40, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1558/3000 [04:03<03:23, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1560/3000 [04:04<04:22, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1562/3000 [04:04<04:10, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1563/3000 [04:04<04:36, 5.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1567/3000 [04:05<03:25, 6.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 1570/3000 [04:05<03:37, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1572/3000 [04:05<02:52, 8.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 1574/3000 [04:06<02:34, 9.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1576/3000 [04:06<02:36, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1578/3000 [04:06<03:37, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1580/3000 [04:07<03:37, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1581/3000 [04:07<04:01, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1582/3000 [04:07<06:21, 3.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1586/3000 [04:08<03:39, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1588/3000 [04:08<03:48, 6.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1594/3000 [04:09<02:34, 9.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1596/3000 [04:09<04:11, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1598/3000 [04:10<04:13, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 53%|█████▎ | 1600/3000 [04:10<03:53, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 1601/3000 [04:10<03:51, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▎ | 1606/3000 [04:11<02:20, 9.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1609/3000 [04:11<02:00, 11.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▎ | 1611/3000 [04:11<02:43, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1613/3000 [04:11<02:37, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1615/3000 [04:12<03:46, 6.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1617/3000 [04:12<03:32, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1618/3000 [04:13<04:13, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1619/3000 [04:13<06:47, 3.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1621/3000 [04:14<07:43, 2.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 1629/3000 [04:15<02:36, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1631/3000 [04:15<02:28, 9.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 1633/3000 [04:15<03:11, 7.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1635/3000 [04:16<04:11, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1638/3000 [04:16<03:41, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1641/3000 [04:17<03:19, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▍ | 1645/3000 [04:17<02:29, 9.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1647/3000 [04:17<02:29, 9.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 1649/3000 [04:18<03:34, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1650/3000 [04:18<03:26, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1653/3000 [04:19<03:57, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1655/3000 [04:19<04:04, 5.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 1660/3000 [04:19<02:23, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 1662/3000 [04:20<03:30, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1665/3000 [04:20<03:27, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1667/3000 [04:21<03:47, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1669/3000 [04:21<04:51, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1673/3000 [04:22<03:06, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1675/3000 [04:22<02:51, 7.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1677/3000 [04:22<03:13, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1679/3000 [04:23<03:10, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1680/3000 [04:23<03:25, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1682/3000 [04:23<03:11, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 1685/3000 [04:24<03:24, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 1686/3000 [04:24<03:12, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1688/3000 [04:24<03:26, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 1690/3000 [04:24<03:48, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1691/3000 [04:25<03:51, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▋ | 1693/3000 [04:25<05:07, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1697/3000 [04:26<03:06, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1701/3000 [04:26<01:59, 10.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1704/3000 [04:26<01:33, 13.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1707/3000 [04:27<02:35, 8.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1709/3000 [04:27<04:19, 4.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1711/3000 [04:28<04:28, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1714/3000 [04:28<03:36, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1716/3000 [04:28<03:14, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1717/3000 [04:29<03:32, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 1722/3000 [04:29<02:23, 8.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 1724/3000 [04:29<02:21, 9.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1726/3000 [04:30<02:41, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1727/3000 [04:30<03:36, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1728/3000 [04:30<03:53, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1730/3000 [04:30<03:22, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1732/3000 [04:31<03:48, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1734/3000 [04:31<03:14, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1737/3000 [04:32<02:46, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1739/3000 [04:32<02:37, 8.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 1741/3000 [04:32<02:20, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1744/3000 [04:33<03:10, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1747/3000 [04:33<04:00, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1750/3000 [04:34<03:11, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 1753/3000 [04:34<03:23, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1756/3000 [04:34<02:38, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▊ | 1759/3000 [04:35<03:28, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 1760/3000 [04:35<03:20, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1763/3000 [04:36<02:38, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1765/3000 [04:36<03:56, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1768/3000 [04:37<03:04, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1770/3000 [04:37<02:50, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1773/3000 [04:37<02:08, 9.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1775/3000 [04:37<02:43, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1776/3000 [04:38<03:26, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 1778/3000 [04:38<04:11, 4.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 1781/3000 [04:39<03:07, 6.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1785/3000 [04:39<03:10, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1786/3000 [04:40<03:26, 5.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 1789/3000 [04:40<03:17, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1791/3000 [04:40<02:43, 7.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1792/3000 [04:40<03:12, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1794/3000 [04:41<03:30, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1798/3000 [04:41<03:03, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|█████▉ | 1799/3000 [04:42<03:36, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1802/3000 [04:42<02:58, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1805/3000 [04:43<02:51, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 1808/3000 [04:43<02:56, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1811/3000 [04:43<02:15, 8.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 1814/3000 [04:43<01:43, 11.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1816/3000 [04:44<02:30, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1818/3000 [04:45<03:53, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1821/3000 [04:45<03:29, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1822/3000 [04:45<04:12, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1823/3000 [04:46<04:45, 4.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1826/3000 [04:46<03:16, 5.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1828/3000 [04:46<03:04, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1830/3000 [04:47<04:02, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 1832/3000 [04:47<03:37, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1834/3000 [04:47<02:57, 6.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 1836/3000 [04:48<03:23, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1838/3000 [04:48<03:40, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1839/3000 [04:49<03:44, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1840/3000 [04:49<03:54, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████▏ | 1842/3000 [04:49<03:41, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████▏ | 1844/3000 [04:49<02:29, 7.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1845/3000 [04:50<03:16, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1847/3000 [04:50<03:34, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1849/3000 [04:50<03:24, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1852/3000 [04:51<02:51, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1854/3000 [04:51<03:23, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1856/3000 [04:52<03:35, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1857/3000 [04:52<03:47, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1859/3000 [04:52<03:07, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1863/3000 [04:52<02:23, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1866/3000 [04:53<03:48, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1868/3000 [04:54<03:03, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 1870/3000 [04:54<03:17, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 1873/3000 [04:54<03:02, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▎ | 1875/3000 [04:55<02:20, 8.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1877/3000 [04:55<02:54, 6.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1878/3000 [04:56<04:12, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1880/3000 [04:56<03:46, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1882/3000 [04:56<03:28, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1885/3000 [04:57<02:54, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1886/3000 [04:57<03:26, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1889/3000 [04:57<03:03, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1891/3000 [04:57<02:26, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1893/3000 [04:59<05:22, 3.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1895/3000 [04:59<04:13, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1896/3000 [04:59<04:20, 4.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1899/3000 [05:00<03:16, 5.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 1900/3000 [05:00<03:15, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 1904/3000 [05:00<03:17, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▎ | 1908/3000 [05:01<01:54, 9.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▎ | 1910/3000 [05:01<02:51, 6.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1913/3000 [05:02<02:34, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1914/3000 [05:02<03:11, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1916/3000 [05:03<04:17, 4.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1918/3000 [05:03<03:26, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1920/3000 [05:03<02:58, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1923/3000 [05:04<02:27, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1924/3000 [05:04<03:20, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1925/3000 [05:04<03:52, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1928/3000 [05:05<03:10, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1930/3000 [05:05<02:46, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1931/3000 [05:05<02:41, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 1934/3000 [05:06<02:47, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 1935/3000 [05:06<02:55, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1938/3000 [05:07<03:32, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1940/3000 [05:07<02:46, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1942/3000 [05:07<02:13, 7.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▍ | 1947/3000 [05:08<02:19, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▍ | 1949/3000 [05:08<03:00, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1951/3000 [05:08<02:24, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1953/3000 [05:09<03:17, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1956/3000 [05:10<03:42, 4.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 1959/3000 [05:10<03:36, 4.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 65%|██████▌ | 1962/3000 [05:11<02:20, 7.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1967/3000 [05:11<01:59, 8.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1969/3000 [05:12<02:51, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1971/3000 [05:12<03:30, 4.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 1974/3000 [05:13<02:48, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1977/3000 [05:13<02:34, 6.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1980/3000 [05:13<02:05, 8.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1982/3000 [05:14<02:03, 8.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1983/3000 [05:14<02:34, 6.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1985/3000 [05:14<02:59, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 1986/3000 [05:15<03:48, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1990/3000 [05:16<03:16, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 1992/3000 [05:16<02:50, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1996/3000 [05:16<02:12, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 1999/3000 [05:17<02:37, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2002/3000 [05:18<03:23, 4.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2005/3000 [05:18<02:42, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2008/3000 [05:19<02:27, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 2011/3000 [05:19<02:24, 6.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2013/3000 [05:19<01:51, 8.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2015/3000 [05:20<03:16, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2016/3000 [05:20<03:30, 4.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2017/3000 [05:21<04:54, 3.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 2019/3000 [05:21<04:08, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2025/3000 [05:22<02:15, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2029/3000 [05:22<01:44, 9.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2031/3000 [05:22<02:14, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2033/3000 [05:23<03:15, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2037/3000 [05:23<02:14, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2039/3000 [05:24<02:22, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2041/3000 [05:24<02:19, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2042/3000 [05:24<02:49, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2043/3000 [05:25<02:55, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2044/3000 [05:25<03:02, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 2046/3000 [05:25<03:20, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2049/3000 [05:26<02:33, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2050/3000 [05:26<02:48, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2053/3000 [05:26<02:03, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2054/3000 [05:26<02:22, 6.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 2055/3000 [05:27<02:46, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 2057/3000 [05:27<03:04, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2058/3000 [05:27<03:01, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 2060/3000 [05:28<03:11, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2064/3000 [05:28<02:18, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2067/3000 [05:28<01:35, 9.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2071/3000 [05:29<01:35, 9.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2073/3000 [05:29<02:26, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2074/3000 [05:30<02:36, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2077/3000 [05:30<02:49, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2078/3000 [05:30<02:36, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▉ | 2080/3000 [05:31<02:41, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 2083/3000 [05:31<02:15, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2085/3000 [05:31<01:55, 7.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2087/3000 [05:32<02:31, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2091/3000 [05:32<02:25, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2092/3000 [05:33<02:28, 6.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 2095/3000 [05:33<02:01, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2096/3000 [05:33<02:06, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2098/3000 [05:33<02:06, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|██████▉ | 2099/3000 [05:34<02:44, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2100/3000 [05:34<04:16, 3.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2103/3000 [05:35<02:55, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 2105/3000 [05:35<03:15, 4.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2108/3000 [05:35<01:54, 7.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2111/3000 [05:36<01:40, 8.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 2114/3000 [05:36<01:52, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2116/3000 [05:37<02:09, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2119/3000 [05:37<02:34, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2120/3000 [05:38<02:55, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2123/3000 [05:38<02:32, 5.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2124/3000 [05:38<02:33, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2126/3000 [05:39<02:28, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2127/3000 [05:39<02:56, 4.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 2130/3000 [05:39<02:01, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2133/3000 [05:39<01:21, 10.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2135/3000 [05:40<01:57, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 2137/3000 [05:40<02:00, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████▏ | 2141/3000 [05:41<01:35, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████▏ | 2143/3000 [05:41<02:02, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2146/3000 [05:41<01:55, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2148/3000 [05:42<02:24, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2151/3000 [05:42<02:01, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2153/3000 [05:42<01:37, 8.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2155/3000 [05:43<02:00, 7.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2161/3000 [05:43<01:39, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2163/3000 [05:44<03:04, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 2165/3000 [05:45<02:27, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2170/3000 [05:45<01:41, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 2172/3000 [05:45<01:49, 7.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2176/3000 [05:45<01:13, 11.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2178/3000 [05:46<01:31, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2180/3000 [05:46<02:34, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2181/3000 [05:47<03:01, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2184/3000 [05:47<02:23, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2186/3000 [05:48<02:26, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2188/3000 [05:48<02:44, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2190/3000 [05:48<02:02, 6.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2194/3000 [05:49<01:36, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2196/3000 [05:49<01:58, 6.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2197/3000 [05:49<02:02, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 2198/3000 [05:50<02:27, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2200/3000 [05:50<02:47, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 2203/3000 [05:51<02:11, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2205/3000 [05:51<01:52, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▎ | 2210/3000 [05:51<01:18, 10.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 2212/3000 [05:52<01:46, 7.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2214/3000 [05:52<01:44, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2216/3000 [05:53<02:24, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2218/3000 [05:53<02:18, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2219/3000 [05:53<02:42, 4.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2220/3000 [05:54<03:18, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2222/3000 [05:54<02:37, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2224/3000 [05:54<02:14, 5.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2227/3000 [05:55<02:03, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2228/3000 [05:55<02:12, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2231/3000 [05:56<02:41, 4.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2232/3000 [05:56<02:58, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 2233/3000 [05:56<03:11, 4.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2236/3000 [05:57<02:22, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2238/3000 [05:57<02:07, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2240/3000 [05:57<01:50, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2241/3000 [05:57<02:09, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2244/3000 [05:58<02:29, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▍ | 2247/3000 [05:59<02:14, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2248/3000 [05:59<02:36, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 2249/3000 [06:00<03:36, 3.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2254/3000 [06:00<01:40, 7.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 2256/3000 [06:00<02:01, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2261/3000 [06:01<01:26, 8.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 75%|███████▌ | 2264/3000 [06:02<02:24, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2266/3000 [06:02<01:56, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2268/3000 [06:02<02:13, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2271/3000 [06:03<02:10, 5.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2273/3000 [06:03<01:45, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2274/3000 [06:04<02:38, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2276/3000 [06:04<02:33, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2279/3000 [06:05<02:01, 5.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2281/3000 [06:05<01:42, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 2284/3000 [06:05<01:40, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2286/3000 [06:05<01:27, 8.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 2287/3000 [06:06<02:12, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2288/3000 [06:06<02:34, 4.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▋ | 2289/3000 [06:06<02:39, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▋ | 2294/3000 [06:07<01:32, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2296/3000 [06:07<01:19, 8.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2298/3000 [06:07<01:17, 9.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2301/3000 [06:08<01:46, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2302/3000 [06:08<01:42, 6.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2303/3000 [06:08<02:59, 3.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2306/3000 [06:09<02:02, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2307/3000 [06:09<02:26, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2308/3000 [06:09<02:38, 4.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2311/3000 [06:10<01:46, 6.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2313/3000 [06:10<02:08, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2314/3000 [06:10<02:10, 5.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2315/3000 [06:11<02:26, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2316/3000 [06:11<02:50, 4.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2318/3000 [06:11<02:10, 5.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 2320/3000 [06:12<02:45, 4.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 2322/3000 [06:12<02:08, 5.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2325/3000 [06:13<02:05, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2326/3000 [06:13<01:52, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2328/3000 [06:13<01:53, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2330/3000 [06:14<02:00, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2332/3000 [06:14<02:03, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2334/3000 [06:14<01:28, 7.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2335/3000 [06:14<01:52, 5.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2337/3000 [06:15<02:37, 4.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2340/3000 [06:16<01:58, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2344/3000 [06:16<01:10, 9.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2346/3000 [06:16<01:41, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2350/3000 [06:17<01:36, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 2353/3000 [06:18<02:17, 4.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 2354/3000 [06:18<02:39, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2356/3000 [06:19<02:26, 4.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2359/3000 [06:19<01:38, 6.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▊ | 2361/3000 [06:20<02:12, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▊ | 2362/3000 [06:20<01:56, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2363/3000 [06:20<02:05, 5.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2366/3000 [06:21<02:06, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2369/3000 [06:21<01:37, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2370/3000 [06:21<01:59, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2373/3000 [06:22<01:42, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2374/3000 [06:23<03:16, 3.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 2380/3000 [06:23<01:25, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2382/3000 [06:24<02:35, 3.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 2384/3000 [06:24<02:11, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2387/3000 [06:25<02:04, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2389/3000 [06:25<01:52, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2391/3000 [06:26<01:38, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 2397/3000 [06:26<00:53, 11.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|███████▉ | 2399/3000 [06:27<01:48, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2401/3000 [06:27<02:01, 4.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2403/3000 [06:28<01:47, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2406/3000 [06:28<02:01, 4.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2408/3000 [06:29<01:42, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 2411/3000 [06:29<01:47, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 2415/3000 [06:30<01:16, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2416/3000 [06:30<01:28, 6.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2417/3000 [06:30<01:39, 5.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2418/3000 [06:30<01:51, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2422/3000 [06:31<02:04, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2425/3000 [06:32<01:21, 7.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2427/3000 [06:32<01:10, 8.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2431/3000 [06:32<01:01, 9.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 2433/3000 [06:32<01:15, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████ | 2436/3000 [06:33<02:00, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2438/3000 [06:34<02:10, 4.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 81%|████████▏ | 2442/3000 [06:34<01:12, 7.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2446/3000 [06:35<00:56, 9.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2448/3000 [06:35<01:17, 7.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2450/3000 [06:36<01:32, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2452/3000 [06:36<01:27, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2453/3000 [06:36<01:35, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2456/3000 [06:37<01:24, 6.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2458/3000 [06:37<01:12, 7.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2462/3000 [06:37<01:02, 8.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2463/3000 [06:38<01:35, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2466/3000 [06:38<01:19, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2467/3000 [06:38<01:28, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 82%|████████▏ | 2469/3000 [06:39<01:39, 5.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 2472/3000 [06:39<01:26, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▎ | 2475/3000 [06:39<01:05, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2478/3000 [06:40<01:17, 6.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2479/3000 [06:40<01:14, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2480/3000 [06:40<01:41, 5.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2481/3000 [06:41<01:42, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2483/3000 [06:41<01:54, 4.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2488/3000 [06:42<01:01, 8.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2490/3000 [06:42<01:09, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2492/3000 [06:42<01:19, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2493/3000 [06:42<01:31, 5.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2496/3000 [06:43<01:27, 5.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 2499/3000 [06:43<00:59, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2501/3000 [06:43<00:55, 9.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2502/3000 [06:44<01:22, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 83%|████████▎ | 2504/3000 [06:44<01:19, 6.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2508/3000 [06:45<01:06, 7.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▎ | 2510/3000 [06:45<01:17, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2513/3000 [06:45<01:20, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2514/3000 [06:46<01:16, 6.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2517/3000 [06:46<01:00, 7.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2518/3000 [06:46<01:06, 7.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2519/3000 [06:46<01:28, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2520/3000 [06:47<01:46, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2521/3000 [06:47<01:48, 4.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2524/3000 [06:48<01:51, 4.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2528/3000 [06:48<01:05, 7.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 2532/3000 [06:49<00:50, 9.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2534/3000 [06:49<01:03, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 2535/3000 [06:49<01:30, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2536/3000 [06:50<01:31, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2539/3000 [06:50<01:24, 5.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2541/3000 [06:51<01:24, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 2544/3000 [06:51<00:59, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▍ | 2547/3000 [06:51<01:01, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2550/3000 [06:52<00:53, 8.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2552/3000 [06:52<00:46, 9.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2557/3000 [06:53<00:56, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 85%|████████▌ | 2561/3000 [06:53<00:58, 7.57it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 2563/3000 [06:53<00:47, 9.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2565/3000 [06:54<01:17, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2567/3000 [06:54<01:20, 5.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2569/3000 [06:55<01:16, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2570/3000 [06:55<01:11, 6.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2573/3000 [06:55<00:58, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2577/3000 [06:56<00:49, 8.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2579/3000 [06:56<00:50, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2581/3000 [06:56<00:46, 9.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2583/3000 [06:57<01:25, 4.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 2586/3000 [06:57<01:12, 5.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▌ | 2587/3000 [06:57<01:07, 6.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2589/3000 [06:58<01:00, 6.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 86%|████████▋ | 2591/3000 [06:58<01:05, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▋ | 2595/3000 [06:59<00:51, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2597/3000 [06:59<01:01, 6.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2598/3000 [06:59<01:11, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2601/3000 [07:00<01:08, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2603/3000 [07:00<00:51, 7.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2607/3000 [07:01<01:22, 4.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2608/3000 [07:01<01:13, 5.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2610/3000 [07:02<01:12, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2613/3000 [07:02<00:59, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2615/3000 [07:02<00:50, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 87%|████████▋ | 2621/3000 [07:03<00:28, 13.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 2623/3000 [07:03<01:04, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2625/3000 [07:04<01:01, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2627/3000 [07:04<01:02, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2628/3000 [07:05<01:20, 4.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2629/3000 [07:05<01:23, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2633/3000 [07:05<00:55, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2636/3000 [07:05<00:48, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2637/3000 [07:06<00:49, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2641/3000 [07:06<00:50, 7.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2642/3000 [07:06<00:54, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2645/3000 [07:07<00:50, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2648/3000 [07:07<00:42, 8.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 2651/3000 [07:08<00:44, 7.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 2653/3000 [07:08<00:48, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2656/3000 [07:08<00:38, 8.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2657/3000 [07:08<00:49, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2658/3000 [07:09<01:03, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2660/3000 [07:09<01:06, 5.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2661/3000 [07:10<01:14, 4.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▊ | 2662/3000 [07:10<01:15, 4.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2664/3000 [07:10<01:17, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2667/3000 [07:11<00:56, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2668/3000 [07:11<00:53, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2669/3000 [07:11<01:20, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2671/3000 [07:12<01:11, 4.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2674/3000 [07:12<00:55, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2676/3000 [07:12<00:48, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 89%|████████▉ | 2681/3000 [07:13<00:30, 10.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 2683/3000 [07:13<00:33, 9.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2685/3000 [07:13<00:35, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2686/3000 [07:14<00:45, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2688/3000 [07:14<00:43, 7.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2691/3000 [07:15<00:53, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2693/3000 [07:15<00:58, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2695/3000 [07:15<00:52, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|████████▉ | 2697/3000 [07:16<00:48, 6.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|████████▉ | 2699/3000 [07:16<00:42, 7.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2702/3000 [07:16<00:37, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2703/3000 [07:17<01:18, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2704/3000 [07:17<01:15, 3.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2706/3000 [07:17<00:58, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 2709/3000 [07:18<00:41, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 90%|█████████ | 2711/3000 [07:18<00:40, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2716/3000 [07:18<00:24, 11.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2718/3000 [07:19<00:46, 6.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2719/3000 [07:20<01:08, 4.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2723/3000 [07:20<00:43, 6.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2726/3000 [07:20<00:30, 8.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2728/3000 [07:21<00:33, 8.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2730/3000 [07:21<00:32, 8.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2732/3000 [07:21<00:36, 7.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 2734/3000 [07:22<00:42, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2735/3000 [07:22<00:45, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████ | 2736/3000 [07:22<00:47, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2739/3000 [07:23<00:45, 5.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████▏| 2743/3000 [07:23<00:31, 8.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2745/3000 [07:23<00:29, 8.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2747/3000 [07:24<00:34, 7.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2750/3000 [07:24<00:30, 8.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2753/3000 [07:25<00:42, 5.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2754/3000 [07:25<00:40, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2755/3000 [07:25<00:44, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2758/3000 [07:25<00:33, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2759/3000 [07:25<00:32, 7.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2761/3000 [07:26<00:40, 5.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2764/3000 [07:26<00:29, 8.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 92%|█████████▏| 2768/3000 [07:27<00:40, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2771/3000 [07:27<00:26, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 2773/3000 [07:28<00:42, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2777/3000 [07:28<00:31, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2779/3000 [07:29<00:29, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2781/3000 [07:29<00:29, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2783/3000 [07:29<00:31, 6.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2785/3000 [07:30<00:33, 6.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2787/3000 [07:30<00:31, 6.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2788/3000 [07:30<00:37, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2789/3000 [07:30<00:40, 5.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2791/3000 [07:31<00:43, 4.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2795/3000 [07:31<00:30, 6.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2797/3000 [07:32<00:26, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 2800/3000 [07:32<00:24, 8.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2801/3000 [07:32<00:29, 6.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2802/3000 [07:32<00:31, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 93%|█████████▎| 2804/3000 [07:33<00:32, 5.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▎| 2805/3000 [07:33<00:58, 3.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▎| 2811/3000 [07:34<00:33, 5.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2815/3000 [07:35<00:25, 7.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2818/3000 [07:35<00:18, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2821/3000 [07:35<00:15, 11.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2823/3000 [07:36<00:37, 4.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2826/3000 [07:37<00:39, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 2830/3000 [07:37<00:25, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 94%|█████████▍| 2834/3000 [07:38<00:22, 7.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2836/3000 [07:38<00:24, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2839/3000 [07:38<00:18, 8.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2842/3000 [07:40<00:36, 4.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 2844/3000 [07:40<00:27, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▍| 2847/3000 [07:40<00:26, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2852/3000 [07:41<00:15, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2854/3000 [07:41<00:20, 7.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2856/3000 [07:42<00:25, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2857/3000 [07:42<00:26, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 95%|█████████▌| 2859/3000 [07:42<00:25, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 2861/3000 [07:43<00:24, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2865/3000 [07:43<00:19, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2867/3000 [07:44<00:19, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2868/3000 [07:44<00:18, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2870/3000 [07:44<00:18, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2872/3000 [07:44<00:18, 6.94it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2873/3000 [07:45<00:23, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2874/3000 [07:45<00:28, 4.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2877/3000 [07:45<00:19, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▌| 2881/3000 [07:46<00:12, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2883/3000 [07:46<00:13, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 2886/3000 [07:46<00:12, 9.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2888/3000 [07:46<00:11, 9.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▋| 2890/3000 [07:47<00:20, 5.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 2893/3000 [07:48<00:16, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2896/3000 [07:49<00:25, 4.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2899/3000 [07:49<00:15, 6.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metricsmetrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
" {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2903/3000 [07:49<00:12, 7.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2905/3000 [07:50<00:15, 6.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2907/3000 [07:50<00:14, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2909/3000 [07:51<00:18, 4.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2912/3000 [07:51<00:13, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2914/3000 [07:51<00:13, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 2919/3000 [07:52<00:06, 12.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2921/3000 [07:52<00:09, 8.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2923/3000 [07:52<00:10, 7.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 2924/3000 [07:53<00:11, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2927/3000 [07:53<00:14, 5.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2931/3000 [07:54<00:09, 6.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2934/3000 [07:54<00:09, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2939/3000 [07:54<00:05, 12.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2941/3000 [07:55<00:06, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2943/3000 [07:55<00:07, 7.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2945/3000 [07:56<00:08, 6.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 98%|█████████▊| 2948/3000 [07:56<00:08, 5.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2951/3000 [07:56<00:06, 8.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 2953/3000 [07:57<00:06, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▊| 2957/3000 [07:57<00:05, 7.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2959/3000 [07:57<00:04, 9.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▊| 2961/3000 [07:58<00:04, 8.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2963/3000 [07:58<00:06, 5.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2964/3000 [07:59<00:07, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2965/3000 [07:59<00:07, 4.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2966/3000 [07:59<00:07, 4.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2968/3000 [07:59<00:06, 5.23it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2970/3000 [08:00<00:04, 7.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2971/3000 [08:00<00:05, 5.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2973/3000 [08:00<00:04, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2976/3000 [08:01<00:03, 6.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2979/3000 [08:01<00:02, 10.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 99%|█████████▉| 2982/3000 [08:01<00:02, 8.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 2984/3000 [08:02<00:02, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2985/3000 [08:02<00:02, 5.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|█████████▉| 2989/3000 [08:02<00:01, 6.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2991/3000 [08:03<00:01, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2994/3000 [08:03<00:00, 8.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2996/3000 [08:03<00:00, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2998/3000 [08:04<00:00, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 2999/3000 [08:05<00:00, 4.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 3000/3000 [08:06<00:00, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2026-01-03 19:26:42.546\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m3\u001b[0m - \u001b[1mEvaluation metrics (after optimization): {'f1': 0.087, 'em': 0.087, 'acc': 0.726}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (after optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2595c93b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a4af1fcb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b463a4a5",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import HotPotQA\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"from dotenv import load_dotenv\n",
"\n",
"from evoagentx.agents.agent_manager import AgentManager\n",
"from evoagentx.benchmark import MBPP\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.core.logging import logger\n",
"from evoagentx.evaluators import Evaluator\n",
"from evoagentx.models import OpenAILLM, OpenAILLMConfig\n",
"from evoagentx.optimizers import TextGradOptimizer\n",
"from evoagentx.prompts import StringTemplate\n",
"from evoagentx.workflow import SequentialWorkFlowGraph\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph, STRUCTUREWorkFlowGraph\n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import HumanEval,AFlowMBPP\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer, STRUCTUREOptimizer\n",
"from evoagentx.optimizers.structure_optimizer import STRUCTUREWorkFlowScheme\n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"\n",
"from evoagentx.models import OpenAILLMConfig, OpenAILLM,AzureOpenAIConfig,LiteLLMConfig,LiteLLM\n",
"from evoagentx.workflow import SEWWorkFlowGraph \n",
"from evoagentx.agents import AgentManager\n",
"from evoagentx.benchmark import MBPPPLUS, AFlowMBPPPLUS\n",
"from evoagentx.evaluators import Evaluator \n",
"from evoagentx.optimizers import SEWOptimizer \n",
"from evoagentx.core.callbacks import suppress_logger_info\n",
"from evoagentx.benchmark import HumanEvalPLUS\n",
"from evoagentx.benchmark import SciCode\n",
"from copy import deepcopy\n",
"\n",
"import nest_asyncio\n",
"nest_asyncio.apply()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "480de58c",
"metadata": {},
"outputs": [],
"source": [
"class HotPotQASplits(HotPotQA):\n",
"\n",
" def _load_data(self):\n",
" # load the original test data \n",
" super()._load_data()\n",
" # split the data into train, dev and test\n",
" import numpy as np \n",
" np.random.seed(42)\n",
" permutation = np.random.permutation(len(self._dev_data))\n",
" full_test_data = self._dev_data \n",
" # randomly select 10 samples for train, 40 for dev, and 100 for test\n",
" self._train_data = [full_test_data[idx] for idx in permutation[:50]]\n",
" self._dev_data = [full_test_data[idx] for idx in permutation[:50]]\n",
" self._test_data = [full_test_data[idx] for idx in permutation[50:550]]\n",
" self._fulldata = full_test_data\n",
"\n",
"\n",
"def collate_func(example: dict) -> dict:\n",
" context_list = []\n",
" for item in example[\"context\"]:\n",
" context = \"Title: {}\\nText: {}\".format(item[0], \" \".join([t.strip() for t in item[1]]))\n",
" context_list.append(context)\n",
" context = \"\\n\\n\".join(context_list)\n",
" problem = \"Context: {}\\n\\nQuestion: {}\\n\\nAnswer:\".format(context, example[\"question\"])\n",
" return {\"problem\": problem}\n",
"\n",
"\n",
"hotpotqa_graph_data = {\n",
" \"goal\": \"Answer the question based on the context. The answer should be a direct response to the question, without including explanations or reasoning.\",\n",
" \"tasks\": [\n",
" {\n",
" \"name\": \"answer_generate\",\n",
" \"description\": \"Answer the question based on the context.\",\n",
" \"inputs\": [\n",
" {\"name\": \"problem\", \"type\": \"str\", \"required\": True, \"description\": \"The problem to solve.\"}\n",
" ],\n",
" \"outputs\": [\n",
" {\"name\": \"answer\", \"type\": \"str\", \"required\": True, \"description\": \"The answer to the problem.\"}\n",
" ],\n",
" \"prompt_template\": StringTemplate(instruction=\"Think step by step to answer the question. You should explain your thinking process in the 'thought' field, and provide the final answer in the 'answer' field.\\nFormat your output in xml format, such as xxx and xxx.\"),\n",
" \"parse_mode\": \"xml\"\n",
" }\n",
" ] \n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "e9818857",
"metadata": {},
"outputs": [],
"source": [
"# os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n",
"# os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://optimizehumaneval.cognitiveservices.azure.com/\"\n",
"# os.environ[\"AZURE_OPENAI_KEY\"] = \"2b7h6anDXRsl5XHDUAGKHpjh3DLv9kLjcjGXN6PvsEmLVf1i3imMJQQJ99BKACYeBjFXJ3w3AAABACOGATqP\"\n",
"# os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n",
"# llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n",
"# azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
"# azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n",
"# api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"\n",
"# executor_llm = LiteLLM(config=llm_config)\n",
"# optimizer_llm = LiteLLM(config=llm_config)\n",
"\n",
"os.environ[\"AZURE_OPENAI_DEPLOYMENT_NAME\"] = \"gpt-4o-mini\"\n",
"os.environ[\"AZURE_OPENAI_ENDPOINT\"] = \"https://75244-mfztkr7x-eastus2.cognitiveservices.azure.com/\"\n",
"os.environ[\"AZURE_OPENAI_KEY\"] = \"8PNMdsUYGdMPsCfl0baO0hjtnGE2m40zJTrUGC3vKnHdpjnkOgeQJQQJ99BIACHYHv6XJ3w3AAAAACOG7VZI\"\n",
"os.environ[\"AZURE_OPENAI_API_VERSION\"] = \"2025-01-01-preview\"\n",
"llm_config = LiteLLMConfig(model=\"azure/\" + os.getenv(\"AZURE_OPENAI_DEPLOYMENT_NAME\"), # Azure model format\n",
" azure_endpoint=os.getenv(\"AZURE_OPENAI_ENDPOINT\"),\n",
" azure_key=os.getenv(\"AZURE_OPENAI_KEY\"),\n",
" api_version=os.getenv(\"AZURE_OPENAI_API_VERSION\", \"2024-12-01-preview\"), top_p=0.85, temperature=0.2, frequency_penalty=0.0, presence_penalty=0.0)\n",
"\n",
"executor_llm = LiteLLM(config=llm_config)\n",
"optimizer_llm = LiteLLM(config=llm_config)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "796739dc",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[32m2025-12-09 12:45:41.759\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hotpotqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /gpfs/radev/home/tl688/.evoagentx/data/hotpotqa/hotpot_train_v1.1.json ...\u001b[0m\n",
"\u001b[32m2025-12-09 12:45:45.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hotpotqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /gpfs/radev/home/tl688/.evoagentx/data/hotpotqa/hotpot_dev_distractor_v1.json ...\u001b[0m\n"
]
}
],
"source": [
"benchmark = HotPotQASplits()\n",
"workflow_graph = SequentialWorkFlowGraph.from_dict(hotpotqa_graph_data)\n",
"agent_manager = AgentManager()\n",
"agent_manager.add_agents_from_workflow(workflow_graph, executor_llm.config)\n",
"\n",
"evaluator = Evaluator(\n",
" llm=executor_llm, \n",
" agent_manager=agent_manager, \n",
" collate_func=collate_func, \n",
" num_workers=20, \n",
" verbose=True\n",
")\n",
"\n",
"textgrad_optimizer = TextGradOptimizer(\n",
" graph=workflow_graph, \n",
" optimize_mode=\"all\",\n",
" executor_llm=executor_llm, \n",
" optimizer_llm=optimizer_llm,\n",
" batch_size=3,\n",
" max_steps=20,\n",
" evaluator=evaluator,\n",
" eval_every_n_steps=1,\n",
" eval_rounds=1,\n",
" save_interval=None,\n",
" save_path=\"./\",\n",
" rollback=True,\n",
" constraints=[]\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "baa44bb7",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"7405"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(benchmark._fulldata)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "3ed1f571",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[32m2025-12-07 19:30:03.447\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m1\u001b[0m - \u001b[1mEvaluating workflow on test set...\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 1/500 [00:01<12:47, 1.54s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 6/500 [00:01<01:41, 4.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 10/500 [00:02<01:07, 7.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 2%|▏ | 12/500 [00:02<00:54, 8.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.75, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 16/500 [00:02<00:39, 12.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 19/500 [00:02<00:43, 11.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.18181818181818182, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▍ | 23/500 [00:03<00:52, 9.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 25/500 [00:03<01:02, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7272727272727273, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 29/500 [00:04<00:41, 11.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 32/500 [00:04<00:38, 12.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 34/500 [00:04<00:50, 9.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4827586206896552, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 37/500 [00:04<00:49, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 39/500 [00:05<00:50, 9.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.13333333333333333, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 9%|▊ | 43/500 [00:05<00:46, 9.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 45/500 [00:05<00:47, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 48/500 [00:05<00:38, 11.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 50/500 [00:06<00:53, 8.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|█ | 52/500 [00:06<00:49, 9.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█▏ | 57/500 [00:06<00:40, 10.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 59/500 [00:07<00:43, 10.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.08695652173913045, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 12%|█▏ | 61/500 [00:07<00:49, 8.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 63/500 [00:07<00:49, 8.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 65/500 [00:07<00:50, 8.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 68/500 [00:08<00:45, 9.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 72/500 [00:08<00:45, 9.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.30769230769230765, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.28571428571428575, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 78/500 [00:08<00:24, 17.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 81/500 [01:00<34:30, 4.94s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 85/500 [01:01<20:14, 2.93s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 91/500 [01:01<09:20, 1.37s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.1111111111111111, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 94/500 [01:01<06:30, 1.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.10526315789473684, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 97/500 [01:01<04:32, 1.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 100/500 [01:02<03:50, 1.73it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 102/500 [01:03<03:16, 2.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 108/500 [01:03<01:39, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.28571428571428575, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 110/500 [01:03<01:26, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4615384615384615, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 112/500 [01:03<01:12, 5.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.9090909090909091, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 114/500 [01:04<01:09, 5.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.16666666666666669, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 116/500 [01:04<01:03, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4799999999999999, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 120/500 [01:04<00:45, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▍ | 123/500 [01:05<00:39, 9.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▌ | 127/500 [01:05<00:44, 8.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 130/500 [01:06<00:47, 7.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 132/500 [01:06<00:38, 9.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 27%|██▋ | 136/500 [01:06<00:36, 10.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5185185185185185, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 28%|██▊ | 141/500 [01:06<00:22, 15.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▊ | 143/500 [01:07<00:30, 11.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 145/500 [01:07<00:31, 11.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444444, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 147/500 [01:07<00:33, 10.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 149/500 [01:08<00:47, 7.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.21052631578947367, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 154/500 [01:08<00:38, 9.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███ | 156/500 [01:08<00:35, 9.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 161/500 [01:08<00:27, 12.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.42857142857142855, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 164/500 [01:09<00:31, 10.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 33%|███▎ | 166/500 [01:09<00:35, 9.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.17391304347826084, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 169/500 [01:10<00:39, 8.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 171/500 [01:10<00:49, 6.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 34%|███▍ | 172/500 [01:10<00:50, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7142857142857143, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▍ | 173/500 [02:01<1:08:26, 12.56s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.13333333333333333, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 36%|███▌ | 179/500 [02:01<17:33, 3.28s/it] "
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.15384615384615383, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 184/500 [02:01<08:09, 1.55s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 187/500 [02:01<05:24, 1.04s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.047619047619047616, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 189/500 [02:02<04:37, 1.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 191/500 [02:03<03:35, 1.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▉ | 196/500 [02:03<01:50, 2.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.14285714285714288, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 40%|███▉ | 199/500 [02:03<01:19, 3.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 203/500 [02:04<00:59, 4.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.09090909090909091, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 208/500 [02:04<00:38, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 42%|████▏ | 210/500 [02:05<00:41, 6.96it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 215/500 [02:05<00:26, 10.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.7777777777777778, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 217/500 [02:05<00:24, 11.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 219/500 [02:05<00:27, 10.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 221/500 [02:06<00:34, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 227/500 [02:06<00:25, 10.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 229/500 [02:06<00:26, 10.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.21428571428571425, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.15384615384615385, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 231/500 [02:07<00:31, 8.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6153846153846153, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 233/500 [02:07<00:29, 9.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 239/500 [02:07<00:22, 11.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.3636363636363636, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 49%|████▉ | 244/500 [02:08<00:24, 10.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 246/500 [02:08<00:25, 9.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|████▉ | 248/500 [02:08<00:26, 9.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 50%|█████ | 251/500 [02:09<00:30, 8.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 253/500 [02:09<00:29, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████▏ | 257/500 [02:09<00:26, 9.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 52%|█████▏ | 261/500 [02:10<00:19, 12.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 263/500 [02:10<00:17, 13.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 265/500 [02:10<00:28, 8.12it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 267/500 [02:11<00:46, 5.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.061224489795918366, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 54%|█████▍ | 269/500 [03:01<25:32, 6.64s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 271/500 [03:01<16:36, 4.35s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▌ | 280/500 [03:01<04:16, 1.17s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5625, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.11764705882352941, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 283/500 [03:02<03:03, 1.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 286/500 [03:03<02:32, 1.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 58%|█████▊ | 290/500 [03:03<01:37, 2.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 59%|█████▉ | 295/500 [03:03<00:51, 3.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2222222222222222, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 297/500 [03:04<00:46, 4.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|██████ | 301/500 [03:04<00:35, 5.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 304/500 [03:05<00:31, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.9090909090909091, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 61%|██████ | 306/500 [03:05<00:30, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 308/500 [03:05<00:23, 8.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 62%|██████▏ | 310/500 [03:05<00:24, 7.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3571428571428571, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 315/500 [03:06<00:18, 10.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 319/500 [03:06<00:23, 7.67it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 322/500 [03:07<00:17, 10.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 327/500 [03:07<00:12, 13.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 329/500 [03:07<00:11, 14.37it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 66%|██████▌ | 331/500 [03:07<00:16, 10.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.25, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 67%|██████▋ | 335/500 [03:08<00:20, 8.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 337/500 [03:08<00:18, 8.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 339/500 [03:08<00:18, 8.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.27118644067796605, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▊ | 343/500 [03:09<00:15, 9.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3157894736842105, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 347/500 [03:09<00:18, 8.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.14285714285714285, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 352/500 [03:10<00:14, 10.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.048780487804878044, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 71%|███████ | 354/500 [03:10<00:16, 8.98it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 358/500 [03:10<00:14, 10.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 360/500 [03:11<00:15, 9.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.21052631578947367, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 361/500 [04:01<19:51, 8.57s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 365/500 [04:01<08:58, 3.99s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.75, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 74%|███████▍ | 371/500 [04:02<03:12, 1.50s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.05263157894736842, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 373/500 [04:02<02:28, 1.17s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 375/500 [04:02<01:55, 1.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6363636363636364, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 76%|███████▌ | 378/500 [04:03<01:14, 1.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 379/500 [04:03<01:03, 1.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 384/500 [04:03<00:26, 4.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 386/500 [04:03<00:20, 5.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 389/500 [04:04<00:18, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4347826086956522, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 391/500 [04:05<00:22, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 394/500 [04:05<00:19, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|███████▉ | 398/500 [04:05<00:12, 8.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 400/500 [04:05<00:10, 9.13it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 80%|████████ | 402/500 [04:06<00:11, 8.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 404/500 [04:06<00:09, 9.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.24000000000000002, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 406/500 [04:06<00:09, 9.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 82%|████████▏ | 409/500 [04:06<00:11, 7.89it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 83%|████████▎ | 413/500 [04:07<00:08, 9.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.09523809523809525, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 84%|████████▍ | 420/500 [04:07<00:05, 14.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 84%|████████▍ | 422/500 [04:08<00:07, 11.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▍ | 424/500 [04:08<00:08, 8.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 85%|████████▌ | 426/500 [04:08<00:09, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 86%|████████▌ | 431/500 [04:09<00:07, 9.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2666666666666667, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 433/500 [04:09<00:07, 9.35it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 87%|████████▋ | 436/500 [04:09<00:05, 10.76it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4210526315789474, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 88%|████████▊ | 440/500 [04:10<00:05, 10.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 88%|████████▊ | 442/500 [04:10<00:05, 10.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 444/500 [04:10<00:06, 8.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 89%|████████▉ | 446/500 [04:10<00:05, 9.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8235294117647058, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 90%|█████████ | 451/500 [04:11<00:04, 11.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6153846153846153, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 91%|█████████ | 455/500 [05:01<03:45, 5.02s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 91%|█████████▏| 457/500 [05:01<02:35, 3.61s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 92%|█████████▏| 462/500 [05:01<01:08, 1.80s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5263157894736842, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 93%|█████████▎| 466/500 [05:02<00:38, 1.13s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 94%|█████████▍| 471/500 [05:03<00:16, 1.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▍| 473/500 [05:03<00:12, 2.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.47619047619047616, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 475/500 [05:03<00:10, 2.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 95%|█████████▌| 477/500 [05:03<00:07, 3.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 96%|█████████▌| 479/500 [05:04<00:05, 3.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.06060606060606061, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 96%|█████████▋| 482/500 [05:04<00:03, 4.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 97%|█████████▋| 483/500 [05:05<00:04, 3.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 97%|█████████▋| 486/500 [05:05<00:02, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 488/500 [05:05<00:01, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.375, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 98%|█████████▊| 490/500 [05:05<00:01, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 494/500 [05:06<00:00, 10.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 99%|█████████▉| 496/500 [05:06<00:00, 10.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 498/500 [05:07<00:00, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 100%|█████████▉| 499/500 [05:07<00:00, 3.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.28571428571428575, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 100%|██████████| 500/500 [05:08<00:00, 1.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.1, 'em': 0.0, 'acc': 1.0}\n",
"\u001b[32m2025-12-07 19:35:11.871\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36m__main__\u001b[0m:\u001b[36m\u001b[0m:\u001b[36m4\u001b[0m - \u001b[1mEvaluation metrics (before optimization): {'f1': 0.6032480280405156, 'em': 0.428, 'acc': 0.632}\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"logger.info(\"Evaluating workflow on test set...\")\n",
"with suppress_logger_info():\n",
" results = textgrad_optimizer.evaluate(dataset=benchmark, eval_mode=\"test\")\n",
"logger.info(f\"Evaluation metrics (before optimization): {results}\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "0f33f493",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'f1': 0.6032480280405156, 'em': 0.428, 'acc': 0.632}"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"results"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "ceebbf97",
"metadata": {},
"outputs": [],
"source": [
"import pickle"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "46206f6d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"500"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(textgrad_optimizer.evaluator._evaluation_records)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "713d6040",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 0%| | 1/500 [00:01<14:52, 1.79s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 0%| | 2/500 [00:02<07:50, 1.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 1%| | 4/500 [00:02<03:34, 2.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 1%|▏ | 7/500 [00:02<01:30, 5.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 2%|▏ | 11/500 [00:03<01:02, 7.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 13/500 [00:03<00:51, 9.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.75, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 3%|▎ | 15/500 [00:03<01:04, 7.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 4%|▎ | 18/500 [00:04<01:26, 5.59it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.18181818181818182, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 4%|▍ | 20/500 [00:04<01:07, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7272727272727273, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 5%|▌ | 25/500 [00:05<01:27, 5.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 5%|▌ | 26/500 [00:05<01:22, 5.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 29/500 [00:06<01:12, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▌ | 31/500 [00:06<01:19, 5.93it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 6%|▋ | 32/500 [00:06<01:32, 5.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 7%|▋ | 35/500 [00:07<01:13, 6.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 7%|▋ | 37/500 [00:07<01:11, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 8%|▊ | 38/500 [00:07<01:42, 4.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 8%|▊ | 40/500 [00:08<01:46, 4.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.13333333333333333, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 45/500 [00:08<00:50, 8.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 9%|▉ | 47/500 [00:09<00:59, 7.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 10%|▉ | 49/500 [00:09<01:23, 5.39it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 11%|█ | 55/500 [00:10<01:19, 5.61it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 11%|█ | 56/500 [00:10<01:15, 5.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 12%|█▏ | 59/500 [00:11<01:05, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.125, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 13%|█▎ | 63/500 [00:11<00:47, 9.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 65/500 [00:12<01:07, 6.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 13%|█▎ | 66/500 [00:12<01:18, 5.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▎ | 68/500 [00:12<01:11, 6.05it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 14%|█▍ | 69/500 [00:12<01:20, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 14%|█▍ | 72/500 [00:13<01:11, 6.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 15%|█▍ | 74/500 [00:13<01:03, 6.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 15%|█▌ | 75/500 [00:13<01:02, 6.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 16%|█▌ | 78/500 [00:14<01:09, 6.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 16%|█▌ | 80/500 [00:14<01:07, 6.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 17%|█▋ | 85/500 [00:15<00:45, 9.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.30769230769230765, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.23529411764705882, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.10526315789473684, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 17%|█▋ | 87/500 [00:15<00:54, 7.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.16666666666666669, 'em': 0.0, 'acc': 0.0}\n",
"\u001b[32m2025-12-09 13:03:55.422\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mevoagentx.evaluators.evaluator\u001b[0m:\u001b[36m_evaluate_single_example\u001b[0m:\u001b[36m205\u001b[0m - \u001b[33m\u001b[1mError evaluating example and set the metrics to None:\n",
"Example: {'_id': '5ac1276c554299294b2190bc', 'answer': 'Euthanasia Program', 'question': \"Heinrich Gross participated in the Nazi regime's Aktion T4 more commonly referred to as what?\", 'supporting_facts': [['Heinrich Gross', 0], ['Aktion T4', 3]], 'context': [['Karl Freiherr Michel von Tüßling', ['Karl Freiherr Michel von Tüßling (27 July 1907 – 30 October 1991) was a Schutzstaffel (SS) officer who served in the Nazi government of German dictator Adolf Hitler and in the SS Main Office.', ' From 1936 onwards, he was the personal adjutant of \"Reichsleiter\" and SS-\"Obergruppenführer\" Philipp Bouhler, who was in charge of Hitler\\'s Chancellery (Kanzlei des Führers), head of the euthanasia programme Aktion T4, as well as co-initiator of Aktion 14f13.', ' In 1947 Tüßling provided an affidavit in defence of war criminal Viktor Brack who was sentenced to death at the Nuremberg trials.']], ['Heinrich Gross', ['Heinrich Gross (14 November 1915 – 15 December 2005) was an Austrian psychiatrist, medical doctor and neurologist, a reputed expert as a leading court-appointed psychiatrist, ill-famed for his proven involvement in the killing of at least nine children with physical, mental and/or emotional/behavioral characteristics considered \"unclean\" by the Nazi regime, under its Euthanasia Program.', ' His role in hundreds of other cases of infanticide is unclear.', \" Gross was head of the Spiegelgrund children's psychiatric clinic for two years during World War II.\"]], ['Am Spiegelgrund clinic', [\"Am Spiegelgrund was the name of a children's clinic in Vienna where 789 children were killed under the Nazi Regime Children's Euthanasia Program, also known as Aktion T4.\", ' Between 1940-1945, the clinic operated as part of the psychological institution “Am Steinhof” (renamed the Otto Wagner Clinic) on the Baumgartner Höhe, now located in Penzing, the 14th district of Vienna.', ' This clinic was divided into a reform school and a sanatorium for children, which included a so-called Children’s Ward, where sick, disabled, and otherwise ‘un-educable’ adolescents were abused and subjected to harsh medical experiments.', \" Some died by lethal injection and gas poisoning; others by disease, undernourishment, exposure to the elements, and 'accidents' relating to their conditions.\", ' The brains of up to 800 victims were preserved in jars and housed in the hospital for decades.']], ['Philipp Bouhler', ['Philipp Bouhler (11 September 1899 – 19 May 1945) was a senior Nazi Party official who was both a \"Reichsleiter\" (National Leader) and Chief of the Chancellery of the Führer of the NSDAP.', ' He was also an SS-\"Obergruppenführer\" in the \"Allgemeine SS\" who was responsible for the Nazi \"Aktion T4\" euthanasia program that killed more than 70,000 handicapped adults and children in Nazi Germany, as well as co-initiator of \"Aktion 14f13\", also called \"Sonderbehandlung\" (\"special treatment\"), that killed 15,000–20,000 concentration camp prisoners.']], ['Artur Hojan', ['Artur Hojan (7 August 1973 – found dead, 9 February 2014) was a journalist and published author specializing in the history of the Chełmno extermination camp and the Nazi involuntary euthanasia programme conducted in the territory of occupied Poland by the SS during World War\\xa0II.', ' Hojan was the co-founder of the \"Tiergartenstrasse4\" Association in 2005 (together with Cameron Munro) devoted to Aktion T4 history, with emphasis on the Kościan psychiatric hospital located where he lived.', ' Hojan, age of 40, left home in the evening of 1\\xa0December 2013 at 8\\xa0p.m. for a walk around town and disappeared.', ' His body was found two months later on 9\\xa0February 2014 floating in the Obra canal near the town of Kiełczewo, and identified later.', ' The cause of death has not been determined.', ' He was buried at the Kościan cemetery on 15\\xa0February 2014.', ' He left behind a wife and young daughter.', ' The monograph \"Treblinka Death Camp: History, Biographies, Remembrance\" by Chris Webb, the co-founder of H.E.A.R.T (also known as the HolocaustResearchProject.org), is dedicated to his memory.']], ['Aktion T4', ['Aktion T4 (German, ] ) was a postwar name for mass murder through involuntary euthanasia in Nazi Germany.', ' The name T4 is an abbreviation of \"Tiergartenstraße 4\", a street address of the Chancellery department set up in the spring of 1940, in the Berlin borough of Tiergarten, which recruited and paid personnel associated with T4.', ' Certain German physicians were authorized to select patients \"deemed incurably sick, after most critical medical examination\" and then administer to them a \"mercy death\" (\"Gnadentod\") .', ' In October 1939 Adolf Hitler signed a \"euthanasia decree\" backdated to 1 September 1939 that authorized his personal physician Karl Brandt and \"Reichsleiter\" Philipp Bouhler to implement the programme.']], ['Karl Brandt', ['Karl Brandt (January 8, 1904 – June 2, 1948) was a German physician and \"Schutzstaffel\" (SS) officer in Nazi Germany.', \" Trained in surgery, Brandt joined the Nazi Party in 1932 and became Adolf Hitler's escort physician in August 1934.\", ' A member of Hitler\\'s inner circle at the Berghof, he was selected by Philipp Bouhler, the head of Hitler\\'s Chancellery, to administer the \"Aktion T4\" euthanasia program.', ' Brandt was later appointed the Reich Commissioner of Sanitation and Health (\"Bevollmächtigter für das Sanitäts- und Gesundheitswesen\").', ' Accused of involvement in human experimentation and other war crimes, Brandt was indicted in late 1946 and faced trial before a U.S. military tribunal along with 22 others in \"United States of America v. Karl Brandt, et al\".', ' He was convicted, sentenced to death, and later hanged on June 2, 1948.']], ['Gerhard Kretschmar', ['Gerhard Herbert Kretschmar (20 February 1939 – 25 July 1939), was a German child born with severe disabilities.', \" After receiving a petition from the child's parents, the German Führer Adolf Hitler authorized one of his personal physicians, Karl Brandt, to have the child killed.\", ' This marked the beginning of the program in Nazi Germany known as a \"euthanasia program\" (Aktion T4) which ultimately resulted in the deliberate killing of about 200,000 people with mental and/or physical disabilities.']], ['Memorandum Authorizing Involuntary Euthanasia', ['Adolf Hitler signed a memorandum authorizing involuntary euthanasia in October 1939 to serve as the legal basis for Aktion T4, the Nazi forced euthanasia program.', ' Its purpose was to assure the doctors and nurses who took part in the euthanasia program would not be prosecuted for murder.', ' During the postwar trials of these same individuals, they attempted to use this decree as a justification for their actions.']], ['Dasein ohne Leben', ['Dasein ohne Leben – Psychiatrie und Menschlichkeit (\"Existence Without Life\" – \"Psychiatry and Humanity\") is a 1942 Nazi propaganda film about the physically and mentally disabled: closeups of disabled persons.', ' The director was Hermann Schwenninger, one of the three managing directors of Gemeinnützige Krankentransport (\"Charitable Ambulance\"), a front company of Aktion T4, the central institution for the mass murder of patients in the Third Reich.', ' Schwenninger also wrote parts of the screenplay of \"Ich klage an\".', \" The contract for the film came from Hitler's Chancellery, and was produced by Tobis Film.\"]]], 'type': 'bridge', 'level': 'hard'}\n",
"Error: The input to LLMOutputParser.parse should be a str, but found .\u001b[0m\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 90/500 [00:16<01:11, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 18%|█▊ | 92/500 [00:16<01:17, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▊ | 93/500 [00:17<01:45, 3.87it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 95/500 [00:17<01:18, 5.14it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 19%|█▉ | 96/500 [00:17<01:54, 3.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.27272727272727276, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|█▉ | 99/500 [00:18<01:14, 5.42it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.19999999999999998, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 20%|██ | 101/500 [00:18<01:06, 5.97it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.1739130434782609, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 103/500 [00:18<00:59, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.1111111111111111, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 104/500 [00:18<01:05, 6.08it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 21%|██ | 106/500 [00:19<01:00, 6.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.9090909090909091, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 22%|██▏ | 109/500 [00:19<01:17, 5.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 110/500 [00:20<01:11, 5.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4615384615384615, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 22%|██▏ | 112/500 [00:20<01:02, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 23%|██▎ | 113/500 [00:20<01:13, 5.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.18181818181818182, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 23%|██▎ | 117/500 [00:21<00:53, 7.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 24%|██▎ | 118/500 [00:21<01:05, 5.80it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 24%|██▍ | 120/500 [00:21<01:07, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.028169014084507043, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 25%|██▍ | 123/500 [00:22<00:53, 7.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 25%|██▌ | 125/500 [00:22<00:44, 8.47it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.7777777777777778, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 26%|██▌ | 129/500 [00:23<00:56, 6.54it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 130/500 [00:23<00:57, 6.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▌ | 131/500 [00:23<01:23, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 26%|██▋ | 132/500 [00:23<01:22, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 133/500 [00:24<01:25, 4.30it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 135/500 [00:24<01:04, 5.63it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 27%|██▋ | 136/500 [00:24<01:22, 4.41it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 29%|██▉ | 144/500 [00:25<00:30, 11.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 29%|██▉ | 146/500 [00:25<00:29, 12.01it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 30%|██▉ | 148/500 [00:26<01:00, 5.84it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 30%|███ | 152/500 [00:26<00:52, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.42857142857142855, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 31%|███ | 155/500 [00:27<01:20, 4.26it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.14285714285714288, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 31%|███▏ | 157/500 [00:27<01:01, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 32%|███▏ | 159/500 [00:28<01:04, 5.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 32%|███▏ | 162/500 [00:28<00:47, 7.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 33%|███▎ | 166/500 [00:29<00:53, 6.25it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 34%|███▍ | 170/500 [00:29<00:39, 8.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.625, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 35%|███▌ | 175/500 [00:30<00:47, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 35%|███▌ | 177/500 [00:31<01:11, 4.49it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.1081081081081081, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 36%|███▌ | 180/500 [00:31<00:52, 6.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 37%|███▋ | 183/500 [00:32<00:52, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 185/500 [00:32<00:43, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 37%|███▋ | 186/500 [00:32<00:45, 6.88it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 188/500 [00:32<00:44, 6.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 38%|███▊ | 190/500 [00:33<00:49, 6.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.08333333333333334, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 39%|███▊ | 193/500 [00:34<01:00, 5.06it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.28571428571428575, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 194/500 [00:34<00:56, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 39%|███▉ | 196/500 [00:34<01:00, 5.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 40%|████ | 200/500 [00:35<00:43, 6.90it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.14285714285714288, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 41%|████ | 204/500 [00:35<00:38, 7.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444444, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████ | 206/500 [00:36<01:00, 4.83it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.7777777777777778, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 41%|████▏ | 207/500 [00:36<01:03, 4.65it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 42%|████▏ | 210/500 [00:37<00:48, 6.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 43%|████▎ | 213/500 [00:37<00:52, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 215/500 [00:38<00:51, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 43%|████▎ | 217/500 [00:38<00:40, 7.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 219/500 [00:38<00:45, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 220/500 [00:39<00:47, 5.86it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 44%|████▍ | 222/500 [00:39<00:41, 6.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.21428571428571425, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 45%|████▌ | 225/500 [00:39<00:38, 7.07it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.15384615384615385, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 45%|████▌ | 226/500 [00:39<00:44, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6153846153846153, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 46%|████▌ | 228/500 [00:40<00:49, 5.55it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▌ | 230/500 [00:40<00:58, 4.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 46%|████▋ | 232/500 [00:41<00:51, 5.19it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 234/500 [00:41<00:40, 6.52it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 47%|████▋ | 236/500 [00:41<00:37, 6.95it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 48%|████▊ | 238/500 [00:42<00:49, 5.33it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 48%|████▊ | 242/500 [00:42<00:34, 7.46it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 245/500 [00:43<00:40, 6.27it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.14285714285714285, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 49%|████▉ | 246/500 [00:43<00:42, 6.00it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 250/500 [00:43<00:34, 7.29it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 50%|█████ | 252/500 [00:44<00:32, 7.58it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 51%|█████ | 254/500 [00:44<00:49, 4.92it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3076923076923077, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.13333333333333336, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████ | 255/500 [00:45<00:55, 4.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 51%|█████▏ | 257/500 [00:45<00:55, 4.38it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 52%|█████▏ | 260/500 [00:46<00:50, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 263/500 [00:46<00:32, 7.22it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.09302325581395349, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 264/500 [00:46<00:34, 6.91it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 266/500 [00:46<00:32, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 53%|█████▎ | 267/500 [00:47<00:46, 5.03it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 270/500 [00:47<00:37, 6.15it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 54%|█████▍ | 272/500 [00:47<00:36, 6.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▍ | 273/500 [00:48<00:41, 5.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 55%|█████▌ | 276/500 [00:48<00:33, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5625, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 55%|█████▌ | 277/500 [00:48<00:36, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 56%|█████▌ | 279/500 [00:48<00:32, 6.74it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 56%|█████▋ | 282/500 [00:49<00:28, 7.77it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 57%|█████▋ | 284/500 [00:49<00:24, 8.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 57%|█████▋ | 287/500 [00:50<00:33, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 289/500 [00:50<00:45, 4.60it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.23529411764705882, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 58%|█████▊ | 292/500 [00:51<00:32, 6.43it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.2857142857142857, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▊ | 293/500 [00:51<00:37, 5.48it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 295/500 [00:51<00:32, 6.21it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 59%|█████▉ | 296/500 [00:51<00:42, 4.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 60%|█████▉ | 299/500 [00:52<00:42, 4.71it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.625, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 60%|██████ | 302/500 [00:52<00:29, 6.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 305/500 [00:53<00:29, 6.69it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 61%|██████ | 306/500 [00:53<00:34, 5.66it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 62%|██████▏ | 310/500 [00:54<00:34, 5.50it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.3571428571428571, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.34285714285714286, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 63%|██████▎ | 313/500 [00:54<00:29, 6.36it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 63%|██████▎ | 316/500 [00:55<00:19, 9.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 64%|██████▍ | 320/500 [00:55<00:23, 7.79it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 64%|██████▍ | 322/500 [00:56<00:43, 4.10it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 65%|██████▌ | 325/500 [00:57<00:33, 5.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▌ | 328/500 [00:57<00:27, 6.17it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5714285714285715, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 66%|██████▋ | 332/500 [00:58<00:24, 6.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.3157894736842105, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 335/500 [00:58<00:25, 6.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 67%|██████▋ | 336/500 [00:58<00:29, 5.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 68%|██████▊ | 340/500 [00:59<00:22, 7.02it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 68%|██████▊ | 341/500 [00:59<00:26, 5.99it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 69%|██████▊ | 343/500 [00:59<00:23, 6.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.23529411764705882, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 69%|██████▉ | 346/500 [01:00<00:25, 6.09it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|██████▉ | 349/500 [01:01<00:26, 5.78it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.3333333333333333, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 70%|███████ | 351/500 [01:01<00:33, 4.40it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.33333333333333337, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 70%|███████ | 352/500 [01:01<00:31, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 71%|███████ | 356/500 [01:02<00:20, 7.20it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 72%|███████▏ | 358/500 [01:02<00:21, 6.53it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 359/500 [01:02<00:19, 7.11it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 361/500 [01:03<00:28, 4.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 72%|███████▏ | 362/500 [01:03<00:29, 4.75it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 73%|███████▎ | 364/500 [01:04<00:26, 5.18it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8571428571428571, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.048780487804878044, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 365/500 [01:04<00:29, 4.51it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 73%|███████▎ | 367/500 [01:04<00:24, 5.32it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.8, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▎ | 368/500 [01:04<00:27, 4.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 74%|███████▍ | 370/500 [01:05<00:23, 5.56it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▍ | 374/500 [01:05<00:14, 8.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 375/500 [01:05<00:22, 5.62it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.125, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 75%|███████▌ | 377/500 [01:06<00:21, 5.72it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 379/500 [01:06<00:20, 5.82it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 76%|███████▌ | 380/500 [01:06<00:22, 5.44it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 0.06451612903225806, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 77%|███████▋ | 384/500 [01:07<00:17, 6.64it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.75, 'em': 0.0, 'acc': 0.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 77%|███████▋ | 385/500 [01:07<00:22, 5.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 78%|███████▊ | 388/500 [01:08<00:17, 6.31it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.5, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 389/500 [01:08<00:21, 5.28it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6666666666666666, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 0.4444444444444445, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 391/500 [01:08<00:17, 6.16it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 78%|███████▊ | 392/500 [01:09<00:24, 4.45it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 79%|███████▉ | 395/500 [01:09<00:18, 5.70it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.10526315789473684, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 79%|███████▉ | 396/500 [01:09<00:20, 5.04it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Evaluating workflow: 80%|████████ | 400/500 [01:10<00:17, 5.68it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n",
"metrics {'f1': 0.5454545454545454, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 403/500 [01:10<00:12, 7.85it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.6363636363636364, 'em': 0.0, 'acc': 1.0}\n",
"metrics {'f1': 1.0, 'em': 1.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\r",
"Evaluating workflow: 81%|████████ | 405/500 [01:10<00:12, 7.34it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"metrics {'f1': 0.41379310344827586, 'em': 0.0, 'acc': 1.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882831.900972921)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Evaluating workflow: 81%|████████ | 406/500 [01:11<00:19, 4.88it/s]Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882838.278584408)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882832.884850688)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882831.427523242)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882833.9227552)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882833.485776952)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882833.419836726)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882833.734559556)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882836.545759743)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882838.642686889)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882835.569353769)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882835.45715636)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882835.484050402)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882835.978691092)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882836.362148808)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882839.550886922)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882843.748531394)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882839.236639148)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882839.608670356)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882839.61855446)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882838.926174395)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882839.920571996)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882842.418300232)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882845.62442739)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882844.463479863)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882844.491282685)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed connector\n",
"connections: ['deque([(, 9882842.748177644)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882842.367578967)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882841.798520438)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882842.813740004)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882842.62341245)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882844.99469701)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882846.026654517)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882846.658695927)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882845.31032316)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882844.703264192)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882844.08250844)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882848.343938269)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882848.521123527)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882847.712289272)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882847.97851446)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882848.871845668)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882852.710383805)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882851.40044568)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882852.974455036)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882854.090643408)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882852.663472926)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882851.131142477)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882856.114402125)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882855.69037116)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882854.83743494)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882854.435514037)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882857.476258684)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882856.70534608)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882861.13784732)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882859.93803291)])']\n",
"connector: \n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882860.098073509)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882859.607659576)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882859.847287528)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882864.1207297)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882866.442518309)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882873.879695881)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882864.060176007)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882862.26817764)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882865.285610208)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882865.184962856)])']\n",
"connector: \n",
"Unclosed connector\n",
"connections: ['deque([(, 9882866.705135787)])']\n",
"connector: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: \n",
"Unclosed client session\n",
"client_session: