File size: 16,599 Bytes

5374a2d

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e2d3caf8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import pickle\n",
    "import glob\n",
    "import pandas as pd\n",
    "import glob\n",
    "from tqdm import tqdm\n",
    "import base64\n",
    "import requests\n",
    "# OpenAI API Key\n",
    "api_key = \"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\"\n",
    "# Function to encode the image\n",
    "import os\n",
    "os.environ[\"OPENAI_API_KEY\"] = api_key\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f870b639",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv\n",
    "from evoagentx.optimizers import AFlowOptimizer\n",
    "from evoagentx.models import LiteLLMConfig, LiteLLM, OpenAILLMConfig, OpenAILLM\n",
    "from evoagentx.benchmark import AFlowHumanEval\n",
    "\n",
    "# Load environment variables\n",
    "load_dotenv()\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
    "# ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1f3dd892",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Configure the optimizer LLM (Claude 3.5 Sonnet)\n",
    "# claude_config = LiteLLMConfig(\n",
    "#     model=\"anthropic/claude-3-5-sonnet-20240620\", \n",
    "#     anthropic_key=ANTHROPIC_API_KEY\n",
    "# )\n",
    "# optimizer_llm = LiteLLM(config=claude_config)\n",
    "\n",
    "# Configure the executor LLM (GPT-4o-mini)\n",
    "openai_config = OpenAILLMConfig(\n",
    "    model=\"gpt-4o-mini\", \n",
    "    openai_key=OPENAI_API_KEY\n",
    ")\n",
    "\n",
    "claude_config = LiteLLMConfig(\n",
    "    model=\"gpt-4o-mini\", \n",
    "    openai_key=OPENAI_API_KEY\n",
    ")\n",
    "executor_llm = OpenAILLM(config=openai_config)\n",
    "optimizer_llm = LiteLLM(config=claude_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a87feb08",
   "metadata": {},
   "outputs": [],
   "source": [
    "EXPERIMENTAL_CONFIG = {\n",
    "    \"humaneval\": {\n",
    "        \"question_type\": \"code\", \n",
    "        \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n",
    "    }, \n",
    "    \"mbpp\": {\n",
    "        \"question_type\": \"code\", \n",
    "        \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n",
    "    },\n",
    "    \"hotpotqa\": {\n",
    "        \"question_type\": \"qa\", \n",
    "        \"operators\": [\"Custom\", \"AnswerGenerate\", \"QAScEnsemble\"]\n",
    "    },\n",
    "    \"gsm8k\": {\n",
    "        \"question_type\": \"math\", \n",
    "        \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n",
    "    },\n",
    "    \"math\": {\n",
    "        \"question_type\": \"math\", \n",
    "        \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b6054068",
   "metadata": {},
   "outputs": [],
   "source": [
    "import evoagentx.workflow.operators as operator\n",
    "import examples.aflow.code_generation.prompt as prompt_custom # noqa: F401\n",
    "from evoagentx.models.model_configs import LLMConfig\n",
    "from evoagentx.benchmark.benchmark import Benchmark\n",
    "from evoagentx.models.model_utils import create_llm_instance\n",
    "\n",
    "class Workflow:\n",
    "    \n",
    "    def __init__(\n",
    "        self,\n",
    "        name: str,\n",
    "        llm_config: LLMConfig,\n",
    "        benchmark: Benchmark\n",
    "    ):\n",
    "        self.name = name\n",
    "        self.llm = create_llm_instance(llm_config)\n",
    "        self.benchmark = benchmark \n",
    "        self.custom = operator.Custom(self.llm)\n",
    "        self.custom_code_generate = operator.CustomCodeGenerate(self.llm)\n",
    "\n",
    "    async def __call__(self, problem: str, entry_point: str):\n",
    "        \"\"\"\n",
    "        Implementation of the workflow\n",
    "        Custom operator to generate anything you want.\n",
    "        But when you want to get standard code, you should use custom_code_generate operator.\n",
    "        \"\"\"\n",
    "        # await self.custom(input=, instruction=\"\")\n",
    "        solution = await self.custom_code_generate(problem=problem, entry_point=entry_point, instruction=prompt_custom.GENERATE_PYTHON_CODE_PROMPT) # But When you want to get standard code ,you should use customcodegenerator.\n",
    "        return solution['response']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "27e574ad",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-10-12 15:04:04.523\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading train data from None\u001b[0m\n",
      "\u001b[32m2025-10-12 15:04:04.524\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m185\u001b[0m - \u001b[1mLoading dev data from humaneval_validate.jsonl\u001b[0m\n",
      "\u001b[32m2025-10-12 15:04:04.525\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m188\u001b[0m - \u001b[1mLoading test data from humaneval_test.jsonl\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "# Initialize the benchmark\n",
    "humaneval = AFlowHumanEval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "2f8da181",
   "metadata": {},
   "outputs": [],
   "source": [
    "optimizer = AFlowOptimizer(\n",
    "    graph_path=\"../examples/aflow/code_generation\",  # Path to the initial workflow graph\n",
    "    optimized_path=\"../examples/aflow/humaneval/optimized\",  # Path to save optimized workflows\n",
    "    optimizer_llm=optimizer_llm,  # LLM for optimization\n",
    "    executor_llm=executor_llm,    # LLM for execution\n",
    "    validation_rounds=3,          # Number of times to run validation on the development set during optimization\n",
    "    eval_rounds=3,               # Number of times to run evaluation on the test set during testing\n",
    "    max_rounds=20,               # Maximum optimization rounds\n",
    "    **EXPERIMENTAL_CONFIG[\"humaneval\"]  # Task-specific configuration, used to specify the task type and available operators\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "74937699",
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98ac4a63",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-10-12 15:04:50.304\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.utils.aflow_utils.graph_utils\u001b[0m:\u001b[36mload_graph\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mError loading graph for round 0: No module named '.'\u001b[0m\n",
      "\u001b[32m2025-10-12 15:04:50.305\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: No module named '.'. Retrying... (Attempt 1/3)\u001b[0m\n",
      "\u001b[32m2025-10-12 15:04:55.310\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.utils.aflow_utils.graph_utils\u001b[0m:\u001b[36mload_graph\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mError loading graph for round 0: No module named '.'\u001b[0m\n",
      "\u001b[32m2025-10-12 15:04:55.311\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: No module named '.'. Retrying... (Attempt 2/3)\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:05.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.utils.aflow_utils.graph_utils\u001b[0m:\u001b[36mload_graph\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mError loading graph for round 0: No module named '.'\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:05.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: No module named '.'. Retrying... (Attempt 3/3)\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:05.322\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mMax retries reached.\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:05.323\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mScore for round 1: None\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:05.326\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 1/3)\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:10.332\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 2/3)\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:20.344\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 3/3)\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:20.344\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m149\u001b[0m - \u001b[1mMax retries reached.\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:20.345\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36moptimize\u001b[0m:\u001b[36m112\u001b[0m - \u001b[1mScore for round 2: None\u001b[0m\n",
      "\u001b[32m2025-10-12 15:05:20.347\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.optimizers.aflow_optimizer\u001b[0m:\u001b[36m_execute_with_retry\u001b[0m:\u001b[36m147\u001b[0m - \u001b[1mError occurred: 'round'. Retrying... (Attempt 1/3)\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "# Optimize the workflow\n",
    "optimizer.optimize(humaneval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1010d583",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "optimizer.test(humaneval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "becb5a82",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5c076d29",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_json(\"/home/tl688/pitl688/selfevolve/AFlow/data/datasets/scicode_dev.jsonl\", lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "481602a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'def get_alpha(recvec, alpha_scaling=5):\\n    \"\"\"\\n    Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n    Parameters:\\n        recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n        alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n    Returns:\\n        float: The calculated alpha value.\\n    \"\"\"\\n    alpha = alpha_scaling * np.max(np.linalg.norm(recvec, axis=1))\\n    return alpha'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['ground_truth_code'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ffb0be7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"def get_alpha(recvec, alpha_scaling=5):\\n    '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n    Parameters:\\n        recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n        alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n    Returns:\\n        float: The calculated alpha value.\\n    '''\""
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['function_header'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "69acf613",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'import numpy as np\\nfrom scipy.special import erfc'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['required_dependencies'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "b5696e0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['step_number', 'step_description_prompt', 'step_background',\n",
       "       'ground_truth_code', 'function_header', 'test_cases', 'return_line',\n",
       "       'required_dependencies'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "0a3085a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"def get_alpha(recvec, alpha_scaling=5):\\n    '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n    Parameters:\\n        recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n        alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n    Returns:\\n        float: The calculated alpha value.\\n    '''\""
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['function_header'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "e6a76c86",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"ref1 = -1.74756\\nEX1 = {\\n    'latvec': np.array([\\n        [0.0, 1.0, 1.0],\\n        [1.0, 0.0, 1.0],\\n        [1.0, 1.0, 0.0]\\n        ]),\\n    'atom_charges': np.array([1]),\\n    'atom_coords': np.array([\\n        [0.0, 0.0, 0.0]\\n        ]),\\n    'configs': np.array([\\n        [1.0, 1.0, 1.0]\\n    ]),\\n}\\nassert np.allclose(get_alpha(np.linalg.inv(EX1['latvec']).T), target)\""
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['test_cases'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "99775141",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}