{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "b06fe5ee", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/gpfs/radev/home/tl688/.conda/envs/evoagentx/lib/python3.11/site-packages/PyPDF2/__init__.py:21: DeprecationWarning: PyPDF2 is deprecated. Please move to the pypdf library instead.\n", " warnings.warn(\n" ] } ], "source": [ "from dotenv import load_dotenv\n", "\n", "from evoagentx.agents.agent_manager import AgentManager\n", "from evoagentx.benchmark import HotPotQA\n", "from evoagentx.core.callbacks import suppress_logger_info\n", "from evoagentx.core.logging import logger\n", "from evoagentx.evaluators import Evaluator\n", "from evoagentx.models import OpenAILLM, OpenAILLMConfig\n", "from evoagentx.optimizers import TextGradOptimizer\n", "from evoagentx.prompts import StringTemplate\n", "from evoagentx.workflow import SequentialWorkFlowGraph\n", "\n", "load_dotenv()\n", "\n", "class HotPotQASplits(HotPotQA):\n", "\n", " def _load_data(self):\n", " # load the original test data \n", " super()._load_data()\n", " # split the data into train, dev and test\n", " import numpy as np \n", " np.random.seed(42)\n", " permutation = np.random.permutation(len(self._dev_data))\n", " full_test_data = self._dev_data \n", " # randomly select 10 samples for train, 40 for dev, and 100 for test\n", " self._train_data = [full_test_data[idx] for idx in permutation[:10]]\n", " self._dev_data = [full_test_data[idx] for idx in permutation[10:50]]\n", " self._test_data = [full_test_data[idx] for idx in permutation[50:150]]" ] }, { "cell_type": "code", "execution_count": 2, "id": "4fc4aacd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "\u001b[32m2025-12-19 20:25:14.235\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hotpotqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /gpfs/radev/home/tl688/.evoagentx/data/hotpotqa/hotpot_train_v1.1.json ...\u001b[0m\n", "\u001b[32m2025-12-19 20:25:17.815\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mevoagentx.benchmark.hotpotqa\u001b[0m:\u001b[36m_load_data_from_file\u001b[0m:\u001b[36m51\u001b[0m - \u001b[1mloading HotPotQA data from /gpfs/radev/home/tl688/.evoagentx/data/hotpotqa/hotpot_dev_distractor_v1.json ...\u001b[0m\n" ] } ], "source": [ "data = HotPotQASplits()" ] }, { "cell_type": "code", "execution_count": 5, "id": "3ce3c923", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Gesellschaft mit beschränkter Haftung'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data._train_data[0]['answer']" ] }, { "cell_type": "code", "execution_count": 8, "id": "da31d473", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"VIVA Media AG changed it's name in 2004. What does their new acronym stand for?\"" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data._train_data[0]['question']" ] }, { "cell_type": "code", "execution_count": 6, "id": "508051c8", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "{'_id': '5a7613c15542994ccc9186bf',\n", " 'answer': 'Gesellschaft mit beschränkter Haftung',\n", " 'question': \"VIVA Media AG changed it's name in 2004. What does their new acronym stand for?\",\n", " 'supporting_facts': [['VIVA Media', 0],\n", " ['Gesellschaft mit beschränkter Haftung', 0]],\n", " 'context': [['Constantin Medien',\n", " ['Constantin Medien AG (formerly EM.Entertainment and EM.TV & Merchandising AG, then EM.TV AG, and finally em.sport media ag) is a German media group, based in Ismaning near Munich, active in the area of sports, film and event marketing to medium-sized media companies.']],\n", " ['VIVA Poland',\n", " ['VIVA Polska (earlier \"VIVApolska!\")',\n", " ' is a Polish 24h music and entertainment channel from Viacom International Media Networks Polska.',\n", " ' The channel was officially launched on June 10, 2000 by the German VIVA Media AG.']],\n", " ['Viva (UK and Ireland)',\n", " ['Viva (stylised as VIVA) is a music television channel in the United Kingdom and Ireland, owned by VIVA Media and thereby Viacom International Media Networks Europe.',\n", " ' The channel launched on 26 October 2009, replacing TMF.']],\n", " ['Blic',\n", " ['Blic (Cyrillic: Блиц, ] ) is a daily middle-market tabloid newspaper in Serbia.',\n", " ' Founded in 1996, \"Blic\" is owned by Ringier Axel Springer Media AG, a joint venture between Ringier media corporation from Switzerland and Axel Springer AG from Germany.']],\n", " ['Qontis',\n", " ['Qontis is a Switzerland based online personal finance management (PFM) platform.',\n", " ' The service is part of a commercial enterprise between the \"Neue Zürcher Zeitung\" media property and e-banking solutions provider Crealogix.',\n", " ' The platform provides users with the ability to document and organize data from all instances of private income and expenditures.',\n", " \" Qontis' CEO (chief executive officer) is Christian Bieri, who formerly served as the Austrian Country Manager and CEE for the Vienna branch of Avaloq Evolution AG.\",\n", " \" The company's CMO (chief marketing officer) is Nils Reimelt, the former digital director at Ringier Axel Springer Media AG.\"]],\n", " ['VIVA Media',\n", " ['VIVA Media GmbH (until 2004 \"VIVA Media AG\") is a music television network originating from Germany.',\n", " ' It was founded for broadcast of VIVA Germany as VIVA Media AG in 1993 and has been owned by their original concurrent Viacom, the parent company of MTV, since 2004.',\n", " ' Viva channels exist in some European countries; the first spin-offs were launched in Poland and Switzerland in 2000.']],\n", " ['ProSiebenSat.1 Media',\n", " ['ProSiebenSat.1 Media SE (officially abbreviated as P7S1, formerly ProSiebenSat.1 Media AG) is a European mass media company, based in Germany.',\n", " ' It operates free-to-air commercial TV channels, pay TV channels, radio stations and related print businesses.',\n", " ' It was formed on October 2, 2000 by the merger of German TV broadcasters ProSieben Media AG (founded in 1989) and Sat.1 SatellitenFernsehen GmbH (founded in 1984 as PKS (Programmgesellschaft für Kabel- und Satellitenrundfunk)).',\n", " ' The company is listed on the Frankfurt Stock Exchange and is a component of the DAX index.']],\n", " ['Gesellschaft mit beschränkter Haftung',\n", " ['A Gesellschaft mit beschränkter Haftung (] , abbreviated GmbH ] and also GesmbH in Austria) is a type of legal entity very common in Germany, Austria, Switzerland (where it is equivalent to a S.à r.l.) and Liechtenstein.',\n", " ' In the United States, the equivalent type of entity is the limited liability company (LLC).',\n", " ' The name of the GmbH form emphasizes the fact that the owners (\"Gesellschafter\", also known as members) of the entity are not personally liable for the company\\'s debts.',\n", " ' \"GmbH\"s are considered legal persons under German and Austrian law.',\n", " ' Other variations include mbH (used when the term \"Gesellschaft\" is part of the company name itself), and gGmbH (\"gemeinnützige\" GmbH) for non-profit companies.']],\n", " ['Mix Megapol',\n", " ['Mix Megapol is a private Swedish radio network controlled by ProSiebenSat.1 Media AG.',\n", " ' It launched in 1993 under the name Skärgårdsradion (Archipelago Radio).',\n", " ' Later that year the name was changed to Radio Megapol when the broadcasting permissions were auctioned out.',\n", " ' In 1997 the word \"Mix\" was added and their slogan became \"The best mix of hits and oldies\".',\n", " ' Mix Megapol is on air in 24 cities from Kiruna in the north to Malmö in the south.',\n", " ' They have over two million listeners per week.',\n", " ' Their target group is people aged between 25 and 45.']],\n", " ['John M. Keller',\n", " ['John M. Keller (born March 5, 1938) is an American educational psychologist.',\n", " ' He is best known for his work on motivation in educational settings and in particular the ARCS model of instructional design.',\n", " ' The four elements of the acronym stand for Attention, Relevance, Confidence and Satisfaction (ARCS).']]],\n", " 'type': 'bridge',\n", " 'level': 'hard'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data._train_data[0]" ] }, { "cell_type": "code", "execution_count": null, "id": "4580a4b1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "id": "eacd5f0d", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'adata' is not defined", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mNameError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[16]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43madata\u001b[49m \n", "\u001b[31mNameError\u001b[39m: name 'adata' is not defined" ] } ], "source": [] }, { "cell_type": "code", "execution_count": 34, "id": "687d8d33", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 35, "id": "45e010eb", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"/gpfs/radev/project/zhao/hs2234/evox/QA/gene_perturb/qa/old_data/adamson_psa_single_qa.csv\")" ] }, { "cell_type": "code", "execution_count": 36, "id": "f06467c8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"You are an expert in single-cell biology and functional genomics. In K562 cells, AARS is perturbed and the expression of ENSG00000092621 is measured. Does this perturbation cause a significant change in ENSG00000092621 expression? Answer strictly in the format 'Final Answer: Yes' or 'Final Answer: No'.\"" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['question'].values[0]" ] }, { "cell_type": "code", "execution_count": 37, "id": "275fd8da", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | question | \n", "answer | \n", "
|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "
| ... | \n", "... | \n", "... | \n", "
| 2575 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "
| 2576 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "
| 2577 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "
| 2578 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "
| 2579 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "
2580 rows × 2 columns
\n", "| \n", " | question | \n", "answer | \n", "
|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "
| ... | \n", "... | \n", "... | \n", "
| 2575 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "
| 2576 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "
| 2577 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "
| 2578 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "
| 2579 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "
2580 rows × 2 columns
\n", "| \n", " | question | \n", "answer | \n", "question_new | \n", "
|---|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "You are an expert in single-cell biology and f... | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "You are a domain expert in functional genomics... | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "You are assisting with the interpretation of p... | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "You are an expert in regulatory genomics. Cons... | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "You are a specialist in gene perturbation expe... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 2575 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "You are an expert in gene expression analysis.... | \n", "
| 2576 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "You are a genomics expert evaluating perturbat... | \n", "
| 2577 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "You are an expert analyst of perturbation data... | \n", "
| 2578 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "You are a functional genomics specialist. In K... | \n", "
| 2579 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "You are an expert in gene regulation studies. ... | \n", "
2580 rows × 3 columns
\n", "| \n", " | question | \n", "answer | \n", "
|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "
| ... | \n", "... | \n", "... | \n", "
| 3145 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "
| 3146 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "
| 3147 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "
| 3148 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "
| 3149 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "
3150 rows × 2 columns
\n", "| \n", " | question | \n", "answer | \n", "question_new | \n", "
|---|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "You are an expert in single-cell biology and f... | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "You are a domain expert in functional genomics... | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "You are assisting with the interpretation of p... | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "You are an expert in regulatory genomics. Cons... | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "You are a specialist in gene perturbation expe... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 3145 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "You are an expert in gene expression analysis.... | \n", "
| 3146 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "You are a genomics expert evaluating perturbat... | \n", "
| 3147 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "You are an expert analyst of perturbation data... | \n", "
| 3148 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "You are a functional genomics specialist. In K... | \n", "
| 3149 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "You are an expert in gene regulation studies. ... | \n", "
3150 rows × 3 columns
\n", "| \n", " | question | \n", "answer | \n", "
|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "
| ... | \n", "... | \n", "... | \n", "
| 62065 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "
| 62066 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "
| 62067 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "
| 62068 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "
| 62069 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "
62070 rows × 2 columns
\n", "| \n", " | question | \n", "answer | \n", "question_new | \n", "
|---|---|---|---|
| 0 | \n", "You are an expert in single-cell biology and f... | \n", "Yes | \n", "You are an expert in single-cell biology and f... | \n", "
| 1 | \n", "You are a domain expert in functional genomics... | \n", "Yes | \n", "You are a domain expert in functional genomics... | \n", "
| 2 | \n", "You are assisting with the interpretation of p... | \n", "Yes | \n", "You are assisting with the interpretation of p... | \n", "
| 3 | \n", "You are an expert in regulatory genomics. Cons... | \n", "Yes | \n", "You are an expert in regulatory genomics. Cons... | \n", "
| 4 | \n", "You are a specialist in gene perturbation expe... | \n", "Yes | \n", "You are a specialist in gene perturbation expe... | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "
| 62065 | \n", "You are an expert in gene expression analysis.... | \n", "Yes | \n", "You are an expert in gene expression analysis.... | \n", "
| 62066 | \n", "You are a genomics expert evaluating perturbat... | \n", "Yes | \n", "You are a genomics expert evaluating perturbat... | \n", "
| 62067 | \n", "You are an expert analyst of perturbation data... | \n", "Yes | \n", "You are an expert analyst of perturbation data... | \n", "
| 62068 | \n", "You are a functional genomics specialist. In K... | \n", "Yes | \n", "You are a functional genomics specialist. In K... | \n", "
| 62069 | \n", "You are an expert in gene regulation studies. ... | \n", "Yes | \n", "You are an expert in gene regulation studies. ... | \n", "
62070 rows × 3 columns
\n", "