File size: 28,510 Bytes

5374a2d

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e2d3caf8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import pickle\n",
    "import glob\n",
    "import pandas as pd\n",
    "import glob\n",
    "from tqdm import tqdm\n",
    "import base64\n",
    "import requests\n",
    "# OpenAI API Key\n",
    "api_key = \"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\"\n",
    "# Function to encode the image\n",
    "import os\n",
    "os.environ[\"OPENAI_API_KEY\"] = api_key\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "f870b639",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from dotenv import load_dotenv\n",
    "from evoagentx.optimizers import AFlowOptimizer\n",
    "from evoagentx.models import LiteLLMConfig, LiteLLM, OpenAILLMConfig, OpenAILLM\n",
    "from evoagentx.benchmark import AFlowHumanEval\n",
    "\n",
    "# Load environment variables\n",
    "load_dotenv()\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
    "# ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1f3dd892",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Configure the optimizer LLM (Claude 3.5 Sonnet)\n",
    "# claude_config = LiteLLMConfig(\n",
    "#     model=\"anthropic/claude-3-5-sonnet-20240620\", \n",
    "#     anthropic_key=ANTHROPIC_API_KEY\n",
    "# )\n",
    "# optimizer_llm = LiteLLM(config=claude_config)\n",
    "\n",
    "# Configure the executor LLM (GPT-4o-mini)\n",
    "openai_config = OpenAILLMConfig(\n",
    "    model=\"gpt-4o-mini\", \n",
    "    openai_key=OPENAI_API_KEY\n",
    ")\n",
    "\n",
    "claude_config = LiteLLMConfig(\n",
    "    model=\"gpt-4o-mini\", \n",
    "    openai_key=OPENAI_API_KEY\n",
    ")\n",
    "executor_llm = OpenAILLM(config=openai_config)\n",
    "optimizer_llm = LiteLLM(config=claude_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a87feb08",
   "metadata": {},
   "outputs": [],
   "source": [
    "EXPERIMENTAL_CONFIG = {\n",
    "    \"humaneval\": {\n",
    "        \"question_type\": \"code\", \n",
    "        \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n",
    "    }, \n",
    "    \"mbpp\": {\n",
    "        \"question_type\": \"code\", \n",
    "        \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n",
    "    },\n",
    "    \"hotpotqa\": {\n",
    "        \"question_type\": \"qa\", \n",
    "        \"operators\": [\"Custom\", \"AnswerGenerate\", \"QAScEnsemble\"]\n",
    "    },\n",
    "    \"gsm8k\": {\n",
    "        \"question_type\": \"math\", \n",
    "        \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n",
    "    },\n",
    "    \"math\": {\n",
    "        \"question_type\": \"math\", \n",
    "        \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n",
    "    }\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b6054068",
   "metadata": {},
   "outputs": [],
   "source": [
    "import evoagentx.workflow.operators as operator\n",
    "import examples.aflow.code_generation.prompt as prompt_custom # noqa: F401\n",
    "from evoagentx.models.model_configs import LLMConfig\n",
    "from evoagentx.benchmark.benchmark import Benchmark\n",
    "from evoagentx.models.model_utils import create_llm_instance\n",
    "\n",
    "class Workflow:\n",
    "    \n",
    "    def __init__(\n",
    "        self,\n",
    "        name: str,\n",
    "        llm_config: LLMConfig,\n",
    "        benchmark: Benchmark\n",
    "    ):\n",
    "        self.name = name\n",
    "        self.llm = create_llm_instance(llm_config)\n",
    "        self.benchmark = benchmark \n",
    "        self.custom = operator.Custom(self.llm)\n",
    "        self.custom_code_generate = operator.CustomCodeGenerate(self.llm)\n",
    "\n",
    "    async def __call__(self, problem: str, entry_point: str):\n",
    "        \"\"\"\n",
    "        Implementation of the workflow\n",
    "        Custom operator to generate anything you want.\n",
    "        But when you want to get standard code, you should use custom_code_generate operator.\n",
    "        \"\"\"\n",
    "        # await self.custom(input=, instruction=\"\")\n",
    "        solution = await self.custom_code_generate(problem=problem, entry_point=entry_point, instruction=prompt_custom.GENERATE_PYTHON_CODE_PROMPT) # But When you want to get standard code ,you should use customcodegenerator.\n",
    "        return solution['response']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "27e574ad",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-10-12 15:15:13.430\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m182\u001b[0m - \u001b[1mLoading train data from None\u001b[0m\n",
      "\u001b[32m2025-10-12 15:15:13.431\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m185\u001b[0m - \u001b[1mLoading dev data from humaneval_validate.jsonl\u001b[0m\n",
      "\u001b[32m2025-10-12 15:15:13.432\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mevoagentx.benchmark.humaneval\u001b[0m:\u001b[36m_load_data\u001b[0m:\u001b[36m188\u001b[0m - \u001b[1mLoading test data from humaneval_test.jsonl\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "# Initialize the benchmark\n",
    "humaneval = AFlowHumanEval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2f8da181",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2025-10-12 15:15:13.504\u001b[0m | \u001b[31m\u001b[1mERROR   \u001b[0m | \u001b[36mevoagentx.core.module\u001b[0m:\u001b[36m__init__\u001b[0m:\u001b[36m107\u001b[0m - \u001b[31m\u001b[1mCan not instantiate AFlowOptimizer from: {\n",
      "    \"graph_path\": \"./examples/aflow/code_generation\",\n",
      "    \"optimized_path\": \"./examples/aflow/humaneval/optimized\",\n",
      "    \"optimizer_llm\": \"<evoagentx.models.litellm_model.LiteLLM object at 0x1526934baf10>\",\n",
      "    \"executor_llm\": \"<evoagentx.models.openai_model.OpenAILLM object at 0x152693662dd0>\",\n",
      "    \"validation_rounds\": 3,\n",
      "    \"eval_rounds\": 3,\n",
      "    \"max_rounds\": 20,\n",
      "    \"question_type\": \"code\",\n",
      "    \"operators\": [\n",
      "        \"Custom\",\n",
      "        \"CustomCodeGenerate\",\n",
      "        \"Test\",\n",
      "        \"ScEnsemble\"\n",
      "    ]\n",
      "}\n",
      "\n",
      ">>>>>>>> 1 Exception Errors: <<<<<<<<\n",
      "\n",
      "FileNotFoundError: [Errno 2] No such file or directory: './examples/aflow/code_generation/graph.py'\u001b[0m\n"
     ]
    },
    {
     "ename": "FileNotFoundError",
     "evalue": "[Errno 2] No such file or directory: './examples/aflow/code_generation/graph.py'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mFileNotFoundError\u001b[39m                         Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m optimizer = \u001b[43mAFlowOptimizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m      2\u001b[39m \u001b[43m    \u001b[49m\u001b[43mgraph_path\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./examples/aflow/code_generation\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Path to the initial workflow graph\u001b[39;49;00m\n\u001b[32m      3\u001b[39m \u001b[43m    \u001b[49m\u001b[43moptimized_path\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m./examples/aflow/humaneval/optimized\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Path to save optimized workflows\u001b[39;49;00m\n\u001b[32m      4\u001b[39m \u001b[43m    \u001b[49m\u001b[43moptimizer_llm\u001b[49m\u001b[43m=\u001b[49m\u001b[43moptimizer_llm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# LLM for optimization\u001b[39;49;00m\n\u001b[32m      5\u001b[39m \u001b[43m    \u001b[49m\u001b[43mexecutor_llm\u001b[49m\u001b[43m=\u001b[49m\u001b[43mexecutor_llm\u001b[49m\u001b[43m,\u001b[49m\u001b[43m    \u001b[49m\u001b[38;5;66;43;03m# LLM for execution\u001b[39;49;00m\n\u001b[32m      6\u001b[39m \u001b[43m    \u001b[49m\u001b[43mvalidation_rounds\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m          \u001b[49m\u001b[38;5;66;43;03m# Number of times to run validation on the development set during optimization\u001b[39;49;00m\n\u001b[32m      7\u001b[39m \u001b[43m    \u001b[49m\u001b[43meval_rounds\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m               \u001b[49m\u001b[38;5;66;43;03m# Number of times to run evaluation on the test set during testing\u001b[39;49;00m\n\u001b[32m      8\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmax_rounds\u001b[49m\u001b[43m=\u001b[49m\u001b[32;43m20\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m               \u001b[49m\u001b[38;5;66;43;03m# Maximum optimization rounds\u001b[39;49;00m\n\u001b[32m      9\u001b[39m \u001b[43m    \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mEXPERIMENTAL_CONFIG\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhumaneval\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Task-specific configuration, used to specify the task type and available operators\u001b[39;49;00m\n\u001b[32m     10\u001b[39m \u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/core/module.py:98\u001b[39m, in \u001b[36mBaseModule.__init__\u001b[39m\u001b[34m(self, **kwargs)\u001b[39m\n\u001b[32m     93\u001b[39m         \u001b[38;5;66;03m# if field_value and isinstance(field_value, dict) and \"class_name\" in field_value:\u001b[39;00m\n\u001b[32m     94\u001b[39m         \u001b[38;5;66;03m#     class_name = field_value.get(\"class_name\")\u001b[39;00m\n\u001b[32m     95\u001b[39m         \u001b[38;5;66;03m#     sub_cls = MODULE_REGISTRY.get_module(cls_name=class_name)\u001b[39;00m\n\u001b[32m     96\u001b[39m         \u001b[38;5;66;03m#     kwargs[field_name] = sub_cls._create_instance(field_value)\u001b[39;00m\n\u001b[32m     97\u001b[39m     \u001b[38;5;28msuper\u001b[39m().\u001b[34m__init__\u001b[39m(**kwargs) \n\u001b[32m---> \u001b[39m\u001b[32m98\u001b[39m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43minit_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     99\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m (ValidationError, \u001b[38;5;167;01mException\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m    100\u001b[39m     exception_handler = callback_manager.get_callback(\u001b[33m\"\u001b[39m\u001b[33mexception_buffer\u001b[39m\u001b[33m\"\u001b[39m)\n",
      "\u001b[36mFile \u001b[39m\u001b[32m/gpfs/radev/pi/ying_rex/tl688/selfevolve/EvoAgentX/evoagentx/optimizers/aflow_optimizer.py:84\u001b[39m, in \u001b[36mAFlowOptimizer.init_module\u001b[39m\u001b[34m(self, **kwargs)\u001b[39m\n\u001b[32m     82\u001b[39m round_zero_path = os.path.join(\u001b[38;5;28mself\u001b[39m.root_path, \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mround_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.round\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m     83\u001b[39m os.makedirs(round_zero_path, exist_ok=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m---> \u001b[39m\u001b[32m84\u001b[39m \u001b[43mshutil\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcopy2\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mgraph_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgraph.py\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mround_zero_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mgraph.py\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     85\u001b[39m shutil.copy2(os.path.join(\u001b[38;5;28mself\u001b[39m.graph_path, \u001b[33m\"\u001b[39m\u001b[33mprompt.py\u001b[39m\u001b[33m\"\u001b[39m), os.path.join(round_zero_path, \u001b[33m\"\u001b[39m\u001b[33mprompt.py\u001b[39m\u001b[33m\"\u001b[39m))\n\u001b[32m     86\u001b[39m \u001b[38;5;28mself\u001b[39m.graph_utils.update_prompt_import(os.path.join(round_zero_path, \u001b[33m\"\u001b[39m\u001b[33mgraph.py\u001b[39m\u001b[33m\"\u001b[39m), round_zero_path)\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/shutil.py:448\u001b[39m, in \u001b[36mcopy2\u001b[39m\u001b[34m(src, dst, follow_symlinks)\u001b[39m\n\u001b[32m    446\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m os.path.isdir(dst):\n\u001b[32m    447\u001b[39m     dst = os.path.join(dst, os.path.basename(src))\n\u001b[32m--> \u001b[39m\u001b[32m448\u001b[39m \u001b[43mcopyfile\u001b[49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdst\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfollow_symlinks\u001b[49m\u001b[43m=\u001b[49m\u001b[43mfollow_symlinks\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    449\u001b[39m copystat(src, dst, follow_symlinks=follow_symlinks)\n\u001b[32m    450\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m dst\n",
      "\u001b[36mFile \u001b[39m\u001b[32m~/.conda/envs/evoagentx/lib/python3.11/shutil.py:256\u001b[39m, in \u001b[36mcopyfile\u001b[39m\u001b[34m(src, dst, follow_symlinks)\u001b[39m\n\u001b[32m    254\u001b[39m     os.symlink(os.readlink(src), dst)\n\u001b[32m    255\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m256\u001b[39m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msrc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mrb\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m fsrc:\n\u001b[32m    257\u001b[39m         \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m    258\u001b[39m             \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(dst, \u001b[33m'\u001b[39m\u001b[33mwb\u001b[39m\u001b[33m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m fdst:\n\u001b[32m    259\u001b[39m                 \u001b[38;5;66;03m# macOS\u001b[39;00m\n",
      "\u001b[31mFileNotFoundError\u001b[39m: [Errno 2] No such file or directory: './examples/aflow/code_generation/graph.py'"
     ]
    }
   ],
   "source": [
    "optimizer = AFlowOptimizer(\n",
    "    graph_path=\"./examples/aflow/code_generation\",  # Path to the initial workflow graph\n",
    "    optimized_path=\"./examples/aflow/humaneval/optimized\",  # Path to save optimized workflows\n",
    "    optimizer_llm=optimizer_llm,  # LLM for optimization\n",
    "    executor_llm=executor_llm,    # LLM for execution\n",
    "    validation_rounds=3,          # Number of times to run validation on the development set during optimization\n",
    "    eval_rounds=3,               # Number of times to run evaluation on the test set during testing\n",
    "    max_rounds=20,               # Maximum optimization rounds\n",
    "    **EXPERIMENTAL_CONFIG[\"humaneval\"]  # Task-specific configuration, used to specify the task type and available operators\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74937699",
   "metadata": {},
   "outputs": [],
   "source": [
    "import nest_asyncio\n",
    "nest_asyncio.apply()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "98ac4a63",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Optimize the workflow\n",
    "optimizer.optimize(humaneval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1010d583",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "optimizer.test(humaneval)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "becb5a82",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "5c076d29",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_json(\"/home/tl688/pitl688/selfevolve/AFlow/data/datasets/scicode_dev.jsonl\", lines=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "481602a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'def get_alpha(recvec, alpha_scaling=5):\\n    \"\"\"\\n    Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n    Parameters:\\n        recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n        alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n    Returns:\\n        float: The calculated alpha value.\\n    \"\"\"\\n    alpha = alpha_scaling * np.max(np.linalg.norm(recvec, axis=1))\\n    return alpha'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['ground_truth_code'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "ffb0be7e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"def get_alpha(recvec, alpha_scaling=5):\\n    '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n    Parameters:\\n        recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n        alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n    Returns:\\n        float: The calculated alpha value.\\n    '''\""
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['function_header'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "69acf613",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'import numpy as np\\nfrom scipy.special import erfc'"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['required_dependencies'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "b5696e0e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['step_number', 'step_description_prompt', 'step_background',\n",
       "       'ground_truth_code', 'function_header', 'test_cases', 'return_line',\n",
       "       'required_dependencies'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "0a3085a9",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"def get_alpha(recvec, alpha_scaling=5):\\n    '''Calculate the alpha value for the Ewald summation, scaled by a specified factor.\\n    Parameters:\\n        recvec (np.ndarray): A 3x3 array representing the reciprocal lattice vectors.\\n        alpha_scaling (float): A scaling factor applied to the alpha value. Default is 5.\\n    Returns:\\n        float: The calculated alpha value.\\n    '''\""
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['function_header'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "e6a76c86",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\"ref1 = -1.74756\\nEX1 = {\\n    'latvec': np.array([\\n        [0.0, 1.0, 1.0],\\n        [1.0, 0.0, 1.0],\\n        [1.0, 1.0, 0.0]\\n        ]),\\n    'atom_charges': np.array([1]),\\n    'atom_coords': np.array([\\n        [0.0, 0.0, 0.0]\\n        ]),\\n    'configs': np.array([\\n        [1.0, 1.0, 1.0]\\n    ]),\\n}\\nassert np.allclose(get_alpha(np.linalg.inv(EX1['latvec']).T), target)\""
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['test_cases'].values[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "153a9929",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e9168d74",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import pickle\n",
    "import glob\n",
    "import pandas as pd\n",
    "import glob\n",
    "from tqdm import tqdm\n",
    "import base64\n",
    "import requests\n",
    "# OpenAI API Key\n",
    "api_key = \"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\"\n",
    "# Function to encode the image\n",
    "import os\n",
    "os.environ[\"OPENAI_API_KEY\"] = api_key\n",
    "\n",
    "\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "from evoagentx.optimizers import AFlowOptimizer\n",
    "from evoagentx.models import LiteLLMConfig, LiteLLM, OpenAILLMConfig, OpenAILLM\n",
    "from evoagentx.benchmark import AFlowHumanEval\n",
    "\n",
    "# Load environment variables\n",
    "load_dotenv()\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
    "# ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\")\n",
    "\n",
    "# # Configure the optimizer LLM (Claude 3.5 Sonnet)\n",
    "# claude_config = LiteLLMConfig(\n",
    "#     model=\"anthropic/claude-3-5-sonnet-20240620\", \n",
    "#     anthropic_key=ANTHROPIC_API_KEY\n",
    "# )\n",
    "# optimizer_llm = LiteLLM(config=claude_config)\n",
    "\n",
    "# Configure the executor LLM (GPT-4o-mini)\n",
    "openai_config = OpenAILLMConfig(\n",
    "    model=\"gpt-4o-mini\", \n",
    "    openai_key=OPENAI_API_KEY\n",
    ")\n",
    "\n",
    "claude_config = LiteLLMConfig(\n",
    "    model=\"gpt-4o-mini\", \n",
    "    openai_key=OPENAI_API_KEY\n",
    ")\n",
    "executor_llm = OpenAILLM(config=openai_config)\n",
    "optimizer_llm = LiteLLM(config=claude_config)\n",
    "\n",
    "EXPERIMENTAL_CONFIG = {\n",
    "    \"humaneval\": {\n",
    "        \"question_type\": \"code\", \n",
    "        \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n",
    "    }, \n",
    "    \"mbpp\": {\n",
    "        \"question_type\": \"code\", \n",
    "        \"operators\": [\"Custom\", \"CustomCodeGenerate\", \"Test\", \"ScEnsemble\"] \n",
    "    },\n",
    "    \"hotpotqa\": {\n",
    "        \"question_type\": \"qa\", \n",
    "        \"operators\": [\"Custom\", \"AnswerGenerate\", \"QAScEnsemble\"]\n",
    "    },\n",
    "    \"gsm8k\": {\n",
    "        \"question_type\": \"math\", \n",
    "        \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n",
    "    },\n",
    "    \"math\": {\n",
    "        \"question_type\": \"math\", \n",
    "        \"operators\": [\"Custom\", \"ScEnsemble\", \"Programmer\"]\n",
    "    }\n",
    "}\n",
    "\n",
    "import evoagentx.workflow.operators as operator\n",
    "import examples.aflow.code_generation.prompt as prompt_custom # noqa: F401\n",
    "from evoagentx.models.model_configs import LLMConfig\n",
    "from evoagentx.benchmark.benchmark import Benchmark\n",
    "from evoagentx.models.model_utils import create_llm_instance\n",
    "\n",
    "class Workflow:\n",
    "    \n",
    "    def __init__(\n",
    "        self,\n",
    "        name: str,\n",
    "        llm_config: LLMConfig,\n",
    "        benchmark: Benchmark\n",
    "    ):\n",
    "        self.name = name\n",
    "        self.llm = create_llm_instance(llm_config)\n",
    "        self.benchmark = benchmark \n",
    "        self.custom = operator.Custom(self.llm)\n",
    "        self.custom_code_generate = operator.CustomCodeGenerate(self.llm)\n",
    "\n",
    "    async def __call__(self, problem: str, entry_point: str):\n",
    "        \"\"\"\n",
    "        Implementation of the workflow\n",
    "        Custom operator to generate anything you want.\n",
    "        But when you want to get standard code, you should use custom_code_generate operator.\n",
    "        \"\"\"\n",
    "        # await self.custom(input=, instruction=\"\")\n",
    "        solution = await self.custom_code_generate(problem=problem, entry_point=entry_point, instruction=prompt_custom.GENERATE_PYTHON_CODE_PROMPT) # But When you want to get standard code ,you should use customcodegenerator.\n",
    "        return solution['response']\n",
    "\n",
    "# Initialize the benchmark\n",
    "humaneval = AFlowHumanEval()\n",
    "\n",
    "optimizer = AFlowOptimizer(\n",
    "    graph_path=\"./examples/aflow/code_generation\",  # Path to the initial workflow graph\n",
    "    optimized_path=\"./examples/aflow/humaneval/optimized\",  # Path to save optimized workflows\n",
    "    optimizer_llm=optimizer_llm,  # LLM for optimization\n",
    "    executor_llm=executor_llm,    # LLM for execution\n",
    "    validation_rounds=3,          # Number of times to run validation on the development set during optimization\n",
    "    eval_rounds=3,               # Number of times to run evaluation on the test set during testing\n",
    "    max_rounds=20,               # Maximum optimization rounds\n",
    "    **EXPERIMENTAL_CONFIG[\"humaneval\"]  # Task-specific configuration, used to specify the task type and available operators\n",
    ")\n",
    "\n",
    "import nest_asyncio\n",
    "nest_asyncio.apply()\n",
    "\n",
    "# Optimize the workflow\n",
    "optimizer.optimize(humaneval)\n",
    "\n",
    "optimizer.test(humaneval)\n",
    "\n",
    "import pandas as pd\n",
    "\n",
    "df = pd.read_json(\"/home/tl688/pitl688/selfevolve/AFlow/data/datasets/scicode_dev.jsonl\", lines=True)\n",
    "\n",
    "df['ground_truth_code'].values[0]\n",
    "\n",
    "df['function_header'].values[0]\n",
    "\n",
    "df['required_dependencies'].values[0]\n",
    "\n",
    "df.columns\n",
    "\n",
    "df['function_header'].values[0]\n",
    "\n",
    "df['test_cases'].values[0]\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}