{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "e2d3caf8", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import pickle\n", "import glob\n", "import pandas as pd\n", "import glob\n", "from tqdm import tqdm\n", "import base64\n", "import requests\n", "# OpenAI API Key\n", "api_key = \"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\"\n", "# Function to encode the image\n", "import os\n", "os.environ[\"OPENAI_API_KEY\"] = api_key\n" ] }, { "cell_type": "code", "execution_count": 2, "id": "f870b639", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n", " warnings.warn(\n" ] } ], "source": [ "import os\n", "from dotenv import load_dotenv\n", "from evoagentx.optimizers import AFlowOptimizer\n", "from evoagentx.models import LiteLLMConfig, LiteLLM, OpenAILLMConfig, OpenAILLM\n", "from evoagentx.benchmark import AFlowHumanEval\n", "\n", "# Load environment variables\n", "load_dotenv()\n", "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n", "# ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "1f3dd892", "metadata": {}, "outputs": [], "source": [ "# # Configure the optimizer LLM (Claude 3.5 Sonnet)\n", "# claude_config = LiteLLMConfig(\n", "# model=\"anthropic/claude-3-5-sonnet-20240620\", \n", "# anthropic_key=ANTHROPIC_API_KEY\n", "# )\n", "# optimizer_llm = LiteLLM(config=claude_config)\n", "\n", "# Configure the executor LLM (GPT-4o-mini)\n", "openai_config = OpenAILLMConfig(\n", " model=\"gpt-4o-mini\", \n", " openai_key=OPENAI_API_KEY\n", ")\n", "\n", "claude_config = LiteLLMConfig(\n", " model=\"gpt-4o-mini\", \n", " openai_key=OPENAI_API_KEY\n", ")\n", "executor_llm = OpenAILLM(config=openai_config)\n", "optimizer_llm = LiteLLM(config=claude_config)" ] }, { "cell_type": "code", "execution_count": 4, "id": "a87feb08", "metadata": {}, "outputs": [], "source": [ "EXPERIMENTAL_CONFIG = {\n", " \"humaneval\": {\n", " \"question_type\": \"code\", \n", " \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n", " }, \n", " \"mbpp\": {\n", " \"question_type\": \"code\", \n", " \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n", " },\n", " \"hotpotqa\": {\n", " \"question_type\": \"qa\", \n", " \"operators\": [\"Custom\", \"AnswerGenerate\", \"QAScEnsemble\"]\n", " },\n", " \"gsm8k\": {\n", " \"question_type\": \"math\", \n", " \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n", " },\n", " \"math\": {\n", " \"question_type\": \"math\", \n", " \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n", " }\n", "}" ] }, { "cell_type": "code", "execution_count": 5, "id": "b6054068", "metadata": {}, "outputs": [], "source": [ "import evoagentx.workflow.operators as operator\n", "import examples.aflow.code_generation.prompt as prompt_custom # noqa: F401\n", "from evoagentx.models.model_configs import LLMConfig\n", "from evoagentx.benchmark.benchmark import Benchmark\n", "from evoagentx.models.model_utils import create_llm_instance\n", "\n", "class Workflow:\n", " \n", " def __init__(\n", " self,\n", " name: str,\n", " llm_config: LLMConfig,\n", " benchmark: Benchmark\n", " ):\n", " self.name = name\n", " self.llm = create_llm_instance(llm_config)\n", " self.benchmark = benchmark \n", " self.custom = operator.Custom(self.llm)\n", " self.custom_code_generate = operator.CustomCodeGenerate(self.llm)\n", "\n", " async def __call__(self, problem: str, entry_point: str):\n", " \"\"\"\n", " Implementation of the workflow\n", " Custom operator to generate anything you want.\n", " But when you want to get standard code, you should use custom_code_generate operator.\n", " \"\"\"\n", " # await self.custom(input=, instruction=\"\")\n", " solution = await self.custom_code_generate(problem=problem, entry_point=entry_point, instruction=prompt_custom.GENERATE_PYTHON_CODE_PROMPT) # But When you want to get standard code ,you should use customcodegenerator.\n", " return solution['response']" ] }, { "cell_type": "code", "execution_count": 6, "id": "27e574ad", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2025-10-12 15:04:04.523\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading train data from None\u001b[0m\n", "\u001b[32m2025-10-12 15:04:04.524\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m185\u001b[0m - \u001b[1mLoading dev data from humaneval_validate.jsonl\u001b[0m\n", "\u001b[32m2025-10-12 15:04:04.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m188\u001b[0m - \u001b[1mLoading test data from humaneval_test.jsonl\u001b[0m\n" ] } ], "source": [ "# Initialize the benchmark\n", "humaneval = AFlowHumanEval()" ] }, { "cell_type": "code", "execution_count": 8, "id": "2f8da181", "metadata": {}, "outputs": [], "source": [ "optimizer = AFlowOptimizer(\n", " graph_path=\"../examples/aflow/code_generation\", # Path to the initial workflow graph\n", " optimized_path=\"../examples/aflow/humaneval/optimized\", # Path to save optimized workflows\n", " optimizer_llm=optimizer_llm, # LLM for optimization\n", " executor_llm=executor_llm, # LLM for execution\n", " validation_rounds=3, # Number of times to run validation on the development set during optimization\n", " eval_rounds=3, # Number of times to run evaluation on the test set during testing\n", " max_rounds=20, # Maximum optimization rounds\n", " **EXPERIMENTAL_CONFIG[\"humaneval\"] # Task-specific configuration, used to specify the task type and available operators\n", ")" ] }, { "cell_type": "code", "execution_count": 9, "id": "74937699", "metadata": {}, "outputs": [], "source": [ "import nest_asyncio\n", "nest_asyncio.apply()" ] }, { "cell_type": "code", "execution_count": null, "id": "98ac4a63", "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2025-10-12 15:04:50.304\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.utils.aflow_utils.graph_utils\u001b[0m:\u001b[36mload_graph\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mError loading graph for round 0: No module named '.'\u001b[0m\n", "\u001b[32m2025-10-12 15:04:50.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: No module named '.'. Retrying... (Attempt 1/3)\u001b[0m\n", "\u001b[32m2025-10-12 15:04:55.310\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.utils.aflow_utils.graph_utils\u001b[0m:\u001b[36mload_graph\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mError loading graph for round 0: No module named '.'\u001b[0m\n", "\u001b[32m2025-10-12 15:04:55.311\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: No module named '.'. Retrying... (Attempt 2/3)\u001b[0m\n", "\u001b[32m2025-10-12 15:05:05.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.utils.aflow_utils.graph_utils\u001b[0m:\u001b[36mload_graph\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mError loading graph for round 0: No module named '.'\u001b[0m\n", "\u001b[32m2025-10-12 15:05:05.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: No module named '.'. Retrying... (Attempt 3/3)\u001b[0m\n", "\u001b[32m2025-10-12 15:05:05.322\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mMax retries reached.\u001b[0m\n", "\u001b[32m2025-10-12 15:05:05.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mScore for round 1: None\u001b[0m\n", "\u001b[32m2025-10-12 15:05:05.326\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 1/3)\u001b[0m\n", "\u001b[32m2025-10-12 15:05:10.332\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 2/3)\u001b[0m\n", "\u001b[32m2025-10-12 15:05:20.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 3/3)\u001b[0m\n", "\u001b[32m2025-10-12 15:05:20.344\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mMax retries reached.\u001b[0m\n", "\u001b[32m2025-10-12 15:05:20.345\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mScore for round 2: None\u001b[0m\n", "\u001b[32m2025-10-12 15:05:20.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 1/3)\u001b[0m\n" ] } ], "source": [ "# Optimize the workflow\n", "optimizer.optimize(humaneval)" ] }, { "cell_type": "code", "execution_count": null, "id": "1010d583", "metadata": { "scrolled": true }, "outputs": [], "source": [ "optimizer.test(humaneval)" ] }, { "cell_type": "code", "execution_count": null, "id": "becb5a82", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 16, "id": "5c076d29", "metadata": {}, "outputs": [], "source": [ "df = pd.read_json(\"/home/tl688/pitl688/selfevolve/AFlow/data/datasets/scicode_dev.jsonl\", lines=True)" ] }, { "cell_type": "code", "execution_count": 23, "id": "481602a9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'def get_alpha(recvec, alpha_scaling=5):\\n \"\"\"\\n Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n Parameters:\\n recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n Returns:\\n float: The calculated alpha value.\\n \"\"\"\\n alpha = alpha_scaling * np.max(np.linalg.norm(recvec, axis=1))\\n return alpha'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['ground_truth_code'].values[0]" ] }, { "cell_type": "code", "execution_count": 21, "id": "ffb0be7e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"def get_alpha(recvec, alpha_scaling=5):\\n '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n Parameters:\\n recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n Returns:\\n float: The calculated alpha value.\\n '''\"" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['function_header'].values[0]" ] }, { "cell_type": "code", "execution_count": 24, "id": "69acf613", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'import numpy as np\\nfrom scipy.special import erfc'" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['required_dependencies'].values[0]" ] }, { "cell_type": "code", "execution_count": 25, "id": "b5696e0e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['step_number', 'step_description_prompt', 'step_background',\n", " 'ground_truth_code', 'function_header', 'test_cases', 'return_line',\n", " 'required_dependencies'],\n", " dtype='object')" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 27, "id": "0a3085a9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"def get_alpha(recvec, alpha_scaling=5):\\n '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n Parameters:\\n recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n Returns:\\n float: The calculated alpha value.\\n '''\"" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['function_header'].values[0]" ] }, { "cell_type": "code", "execution_count": 28, "id": "e6a76c86", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"ref1 = -1.74756\\nEX1 = {\\n 'latvec': np.array([\\n [0.0, 1.0, 1.0],\\n [1.0, 0.0, 1.0],\\n [1.0, 1.0, 0.0]\\n ]),\\n 'atom_charges': np.array([1]),\\n 'atom_coords': np.array([\\n [0.0, 0.0, 0.0]\\n ]),\\n 'configs': np.array([\\n [1.0, 1.0, 1.0]\\n ]),\\n}\\nassert np.allclose(get_alpha(np.linalg.inv(EX1['latvec']).T), target)\"" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['test_cases'].values[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "99775141", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.13" } }, "nbformat": 4, "nbformat_minor": 5 }