Spaces:

valory
/

olas-prediction-live-dashboard

Running

App Files Files Community

arshy commited on May 27, 2024

Commit

4261f54

1 Parent(s): f7c27a1

26052024

Browse files

Files changed (9) hide show

data/all_trades_profitability.parquet +2 -2
data/fpmmTrades.parquet +2 -2
data/requests.parquet +2 -2
data/summary_profitability.parquet +2 -2
data/t_map.pkl +2 -2
scripts/tools.py +5 -8
tabs/error.py +4 -4
tabs/tool_win.py +1 -10
test.ipynb +318 -90

data/all_trades_profitability.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b77385b7a1d673d3906867642f2b7f0cd1407fbb223d0719402dfed69eacce18
-size 8306850

 version https://git-lfs.github.com/spec/v1
+oid sha256:651c73abd6f2d68f12fa1b20363340c1ceff7652960fe4b47442b95865ef78ae
+size 8363176

data/fpmmTrades.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:666289cd9f5d0416d0fd435f1112e6f8c5d6d68e5658d4628fa100b35d585302
-size 20896303

 version https://git-lfs.github.com/spec/v1
+oid sha256:db7352aa0dcf2ffd2f3c86a2edbcb13dca42c9d5089787a5a73399065a3e6444
+size 21257018

data/requests.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51b9e0a5cf0d548af32a14b4adb536238084053f25ba22f09446331054c36810
-size 47481087

 version https://git-lfs.github.com/spec/v1
+oid sha256:f89b9db573611cd096e6b17c909842690c3bd2f38f0763e4a809ccfe0ef718d6
+size 48251533

data/summary_profitability.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7371fc22b1f05b2501cc6fd535df09d2059e302c2da1a67ce9d7d866810f85c4
-size 52478

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6b13394febf32397270399196772b87014367fd2131fe15d87deb53771b6f60
+size 52459

data/t_map.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c713d04aeb16fdcf2fb6db1efa4752d00bb778d6ed83b67e8606fc51b67a367
-size 7919513

 version https://git-lfs.github.com/spec/v1
+oid sha256:c73106e6ae68724a551c807e8a67d209878ecaf2badaae84307fd9ccb9c9cff9
+size 8126752

scripts/tools.py CHANGED Viewed

@@ -267,19 +267,19 @@ class MechResponse:
                 if isinstance(self.result, str):
                     kwargs = json.loads(self.result)
                     self.result = PredictionResponse(**kwargs)
-                    self.error = str(False)
             except JSONDecodeError:
                 self.error_message = "Response parsing error"
-                self.error = str(True)
             except Exception as e:
                 self.error_message = str(e)
-                self.error = str(True)
         else:
             self.error_message = "Invalid response from tool"
-            self.error = str(True)
             self.result = None
@@ -616,6 +616,7 @@ def store_progress(
     tools: pd.DataFrame,
 ) -> None:
     """Store the given progress."""
     if filename:
         DATA_DIR.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists
         for event_name, content in event_to_contents.items():
@@ -623,8 +624,6 @@ def store_progress(
             try:
                 if "result" in content.columns:
                     content = content.drop(columns=["result"])  # Avoid in-place modification
-                if 'error' in content.columns:
-                    content['error'] = content['error'].astype(bool)
                 content.to_parquet(DATA_DIR / event_filename, index=False)
             except Exception as e:
                 print(f"Failed to write {event_name}: {e}")
@@ -632,8 +631,6 @@ def store_progress(
         try:
             if "result" in tools.columns:
                 tools = tools.drop(columns=["result"])
-            if 'error' in tools.columns:
-                tools['error'] = tools['error'].astype(bool)
             tools.to_parquet(DATA_DIR / filename, index=False)
         except Exception as e:
             print(f"Failed to write tools data: {e}")

                 if isinstance(self.result, str):
                     kwargs = json.loads(self.result)
                     self.result = PredictionResponse(**kwargs)
+                    self.error = 0
             except JSONDecodeError:
                 self.error_message = "Response parsing error"
+                self.error = 1
             except Exception as e:
                 self.error_message = str(e)
+                self.error = 1
         else:
             self.error_message = "Invalid response from tool"
+            self.error = 1
             self.result = None
     tools: pd.DataFrame,
 ) -> None:
     """Store the given progress."""
+    print("starting")
     if filename:
         DATA_DIR.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists
         for event_name, content in event_to_contents.items():
             try:
                 if "result" in content.columns:
                     content = content.drop(columns=["result"])  # Avoid in-place modification
                 content.to_parquet(DATA_DIR / event_filename, index=False)
             except Exception as e:
                 print(f"Failed to write {event_name}: {e}")
         try:
             if "result" in tools.columns:
                 tools = tools.drop(columns=["result"])
             tools.to_parquet(DATA_DIR / filename, index=False)
         except Exception as e:
             print(f"Failed to write tools data: {e}")

tabs/error.py CHANGED Viewed

@@ -19,14 +19,14 @@ def get_error_data(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame
     tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]
     # tools_inc['error'] = tools_inc.apply(set_error, axis=1)
     error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
-    error['error_perc'] = (error[True] / (error[False] + error[True])) * 100
-    error['total_requests'] = error[False] + error[True]
     return error
 def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:
     """Gets the error data for the given tools and calculates the error percentage."""
-    error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()
-    error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100
     error_total.columns = error_total.columns.astype(str)
     error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
     return error_total

     tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]
     # tools_inc['error'] = tools_inc.apply(set_error, axis=1)
     error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
+    error['error_perc'] = (error[1] / (error[0] + error[1])) * 100
+    error['total_requests'] = error[0] + error[1]
     return error
 def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:
     """Gets the error data for the given tools and calculates the error percentage."""
+    error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', 1: 'sum', 0: 'sum'}).reset_index()
+    error_total['error_perc'] = (error_total[1] / error_total['total_requests']) * 100
     error_total.columns = error_total.columns.astype(str)
     error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
     return error_total

tabs/tool_win.py CHANGED Viewed

@@ -7,20 +7,11 @@ HEIGHT=600
 WIDTH=1000
-# def set_error(row: pd.Series) -> bool:
-#     """Sets the error for the given row."""
-#     if row.error not in [True, False]:
-#         if not row.prompt_response:
-#             return True
-#         return False
-#     return row.error
 def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:
     """Gets the tool winning rate data for the given tools and calculates the winning percentage."""
     tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]
     # tools_inc['error'] = tools_inc.apply(set_error, axis=1)
-    tools_non_error = tools_inc[tools_inc['error'] != True]
     tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})
     tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]
     tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]

 WIDTH=1000
 def get_tool_winning_rate(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:
     """Gets the tool winning rate data for the given tools and calculates the winning percentage."""
     tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]
     # tools_inc['error'] = tools_inc.apply(set_error, axis=1)
+    tools_non_error = tools_inc[tools_inc['error'] != 1]
     tools_non_error.loc[:, 'currentAnswer'] = tools_non_error['currentAnswer'].replace({'no': 'No', 'yes': 'Yes'})
     tools_non_error = tools_non_error[tools_non_error['currentAnswer'].isin(['Yes', 'No'])]
     tools_non_error = tools_non_error[tools_non_error['vote'].isin(['Yes', 'No'])]

test.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -20,134 +20,362 @@
     "from enum import Enum\n",
     "from tqdm import tqdm\n",
     "import numpy as np\n",
-    "from pathlib import Path"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
-    "INC_TOOLS = [\n",
-    "    'prediction-online', \n",
-    "    'prediction-offline', \n",
-    "    'claude-prediction-online', \n",
-    "    'claude-prediction-offline', \n",
-    "    'prediction-offline-sme',\n",
-    "    'prediction-online-sme',\n",
-    "    'prediction-request-rag',\n",
-    "    'prediction-request-reasoning',\n",
-    "    'prediction-url-cot-claude', \n",
-    "    'prediction-request-rag-claude',\n",
-    "    'prediction-request-reasoning-claude'\n",
-    "]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:5: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
-      "  trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
-      "/var/folders/l_/g22b1g_n0gn4tmx9lkxqv5x00000gn/T/ipykernel_58769/3518445359.py:6: UserWarning: Converting to PeriodArray/Index representation will drop timezone information.\n",
-      "  trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n"
-     ]
-    }
-   ],
    "source": [
-    "def prepare_trades(trades_df: pd.DataFrame) -> pd.DataFrame:\n",
-    "    \"\"\"Prepares the trades data for analysis.\"\"\"\n",
-    "    trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
-    "    trades_df['creation_timestamp'] = trades_df['creation_timestamp'].dt.tz_convert('UTC')\n",
-    "    trades_df['month_year'] = trades_df['creation_timestamp'].dt.to_period('M').astype(str)\n",
-    "    trades_df['month_year_week'] = trades_df['creation_timestamp'].dt.to_period('W').astype(str)\n",
-    "    trades_df['winning_trade'] = trades_df['winning_trade'].astype(int)\n",
-    "    return trades_df\n",
     "\n",
-    "def prepare_data():\n",
-    "    tools_df = pd.read_parquet(\"./data/tools.parquet\")\n",
-    "    trades_df = pd.read_parquet(\"./data/all_trades_profitability.parquet\")\n",
     "\n",
-    "    tools_df['request_time'] = pd.to_datetime(tools_df['request_time'])\n",
-    "    tools_df = tools_df[tools_df['request_time'].dt.year == 2024]\n",
     "\n",
-    "    trades_df['creation_timestamp'] = pd.to_datetime(trades_df['creation_timestamp'])\n",
-    "    trades_df = trades_df[trades_df['creation_timestamp'].dt.year == 2024]\n",
     "\n",
-    "    trades_df = prepare_trades(trades_df)\n",
-    "    return tools_df, trades_df\n",
     "\n",
-    "tools_df, trades_df = prepare_data()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['trader_address', 'trade_id', 'creation_timestamp', 'title',\n",
-       "       'market_status', 'collateral_amount', 'outcome_index',\n",
-       "       'trade_fee_amount', 'outcomes_tokens_traded', 'current_answer',\n",
-       "       'is_invalid', 'winning_trade', 'earnings', 'redeemed',\n",
-       "       'redeemed_amount', 'num_mech_calls', 'mech_fee_amount', 'net_earnings',\n",
-       "       'roi', 'month_year', 'month_year_week'],\n",
-       "      dtype='object')"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "trades_df.columns"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "def get_error_data(tools_df: pd.DataFrame, inc_tools: List[str]) -> pd.DataFrame:\n",
-    "    \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
-    "    tools_inc = tools_df[tools_df['tool'].isin(inc_tools)]\n",
-    "    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack(fill_value=0).reset_index()\n",
-    "    error['error_perc'] = (error[True] / (error[False] + error[True])) * 100\n",
-    "    error['total_requests'] = error[False] + error[True]\n",
-    "    return error\n",
     "\n",
-    "def get_error_data_overall(error_df: pd.DataFrame) -> pd.DataFrame:\n",
-    "    \"\"\"Gets the error data for the given tools and calculates the error percentage.\"\"\"\n",
-    "    error_total = error_df.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()\n",
-    "    error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100\n",
-    "    error_total.columns = error_total.columns.astype(str)\n",
-    "    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))\n",
-    "    return error_total"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
-    "error_df = get_error_data(\n",
-    "    tools_df=tools_df,\n",
-    "    inc_tools=INC_TOOLS\n",
-    ")\n",
-    "error_overall_df = get_error_data_overall(\n",
-    "    error_df=error_df\n",
-    ")"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "from enum import Enum\n",
     "from tqdm import tqdm\n",
     "import numpy as np\n",
+    "from pathlib import Path\n",
+    "import pickle"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# trades = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard_old/data/all_trades_profitability.parquet')\n",
+    "tools = pd.read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard_old/data/tools.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools.groupby(['request_month_year_week', 'error']).size().unstack()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t_map = pickle.load(open('./data/t_map.pkl', 'rb'))\n",
+    "tools['request_time'] = tools['request_block'].map(t_map)\n",
+    "tools.to_parquet('./data/tools.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools['request_time'] = pd.to_datetime(tools['request_time'])\n",
+    "tools = tools[tools['request_time'] >= pd.to_datetime('2024-05-01')]\n",
+    "tools['request_block'].max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "requests = pd.read_parquet(\"./data/requests.parquet\")\n",
+    "delivers = pd.read_parquet(\"./data/delivers.parquet\")\n",
+    "print(requests.shape)\n",
+    "print(delivers.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "requests[requests['request_block'] <= 33714082].reset_index(drop=True).to_parquet(\"./data/requests.parquet\")\n",
+    "delivers[delivers['deliver_block'] <= 33714082].reset_index(drop=True).to_parquet(\"./data/delivers.parquet\")"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import sys \n",
+    "\n",
+    "sys.path.append('./')\n",
+    "from scripts.tools import *"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
+    "RPCs = [\n",
+    "    \"https://lb.nodies.app/v1/406d8dcc043f4cb3959ed7d6673d311a\",\n",
+    "]\n",
+    "w3s = [Web3(HTTPProvider(r)) for r in RPCs]\n",
+    "session = create_session()\n",
+    "event_to_transformer = {\n",
+    "    MechEventName.REQUEST: transform_request,\n",
+    "    MechEventName.DELIVER: transform_deliver,\n",
+    "}\n",
+    "mech_to_info = {\n",
+    "    to_checksum_address(address): (\n",
+    "        os.path.join(CONTRACTS_PATH, filename),\n",
+    "        earliest_block,\n",
+    "    )\n",
+    "    for address, (filename, earliest_block) in MECH_TO_INFO.items()\n",
+    "}\n",
+    "event_to_contents = {}\n",
     "\n",
+    "# latest_block = w3s[0].eth.get_block(LATEST_BLOCK_NAME)[BLOCK_DATA_NUMBER]\n",
+    "latest_block = 34032575\n",
     "\n",
+    "next_start_block = latest_block - 300\n",
     "\n",
+    "events_request = []\n",
+    "events_deliver = []\n",
+    "# Loop through events in event_to_transformer\n",
+    "for event_name, transformer in event_to_transformer.items():\n",
+    "    print(f\"Fetching {event_name.value} events\")\n",
+    "    for address, (abi, earliest_block) in mech_to_info.items():\n",
+    "        # parallelize the fetching of events\n",
+    "        with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:\n",
+    "            futures = []\n",
+    "            for i in range(\n",
+    "                next_start_block, latest_block, BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE\n",
+    "            ):\n",
+    "                futures.append(\n",
+    "                    executor.submit(\n",
+    "                        get_events,\n",
+    "                        random.choice(w3s),\n",
+    "                        event_name.value,\n",
+    "                        address,\n",
+    "                        abi,\n",
+    "                        i,\n",
+    "                        min(i + BLOCKS_CHUNK_SIZE * SNAPSHOT_RATE, latest_block),\n",
+    "                    )\n",
+    "                )\n",
     "\n",
+    "            for future in tqdm(\n",
+    "                as_completed(futures),\n",
+    "                total=len(futures),\n",
+    "                desc=f\"Fetching {event_name.value} Events\",\n",
+    "            ):\n",
+    "                current_mech_events = future.result()\n",
+    "                if event_name == MechEventName.REQUEST:\n",
+    "                    events_request.extend(current_mech_events)\n",
+    "                elif event_name == MechEventName.DELIVER:\n",
+    "                    events_deliver.extend(current_mech_events)\n",
     "\n",
+    "    parsed_request = parse_events(events_request)\n",
+    "    parsed_deliver = parse_events(events_deliver)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
+    "contents_request = []\n",
+    "with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:\n",
+    "    futures = []\n",
+    "    for i in range(0, len(parsed_request), GET_CONTENTS_BATCH_SIZE):\n",
+    "        futures.append(\n",
+    "            executor.submit(\n",
+    "                get_contents,\n",
+    "                session,\n",
+    "                parsed_request[i : i + GET_CONTENTS_BATCH_SIZE],\n",
+    "                MechEventName.REQUEST,\n",
+    "            )\n",
+    "        )\n",
+    "\n",
+    "    for future in tqdm(\n",
+    "        as_completed(futures),\n",
+    "        total=len(futures),\n",
+    "        desc=f\"Fetching {event_name.value} Contents\",\n",
+    "    ):\n",
+    "        current_mech_contents = future.result()\n",
+    "        contents_request.append(current_mech_contents)\n",
+    "\n",
+    "contents_request = pd.concat(contents_request, ignore_index=True)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "contents_deliver = []\n",
+    "with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:\n",
+    "    futures = []\n",
+    "    for i in range(0, len(parsed_deliver), GET_CONTENTS_BATCH_SIZE):\n",
+    "        futures.append(\n",
+    "            executor.submit(\n",
+    "                get_contents,\n",
+    "                session,\n",
+    "                parsed_deliver[i : i + GET_CONTENTS_BATCH_SIZE],\n",
+    "                MechEventName.DELIVER,\n",
+    "            )\n",
+    "        )\n",
+    "\n",
+    "    for future in tqdm(\n",
+    "        as_completed(futures),\n",
+    "        total=len(futures),\n",
+    "        desc=f\"Fetching {event_name.value} Contents\",\n",
+    "    ):\n",
+    "        current_mech_contents = future.result()\n",
+    "        contents_deliver.append(current_mech_contents)\n",
     "\n",
+    "contents_deliver = pd.concat(contents_deliver, ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "full_contents = True\n",
+    "transformed_request = event_to_transformer[MechEventName.REQUEST](contents_request)\n",
+    "transformed_deliver = event_to_transformer[MechEventName.DELIVER](contents_deliver, full_contents=full_contents)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "transformed_request.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_deliver.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tools = pd.merge(transformed_request, transformed_deliver, on=REQUEST_ID_FIELD)\n",
+    "tools.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def store_progress(\n",
+    "    filename: str,\n",
+    "    event_to_contents: Dict[str, pd.DataFrame],\n",
+    "    tools: pd.DataFrame,\n",
+    ") -> None:\n",
+    "    \"\"\"Store the given progress.\"\"\"\n",
+    "    if filename:\n",
+    "        DATA_DIR.mkdir(parents=True, exist_ok=True)  # Ensure the directory exists\n",
+    "        for event_name, content in event_to_contents.items():\n",
+    "            event_filename = gen_event_filename(event_name)  # Ensure this function returns a valid filename string\n",
+    "            try:\n",
+    "                if \"result\" in content.columns:\n",
+    "                    content = content.drop(columns=[\"result\"])  # Avoid in-place modification\n",
+    "                if 'error' in content.columns:\n",
+    "                    content['error'] = content['error'].astype(bool)\n",
+    "                content.to_parquet(DATA_DIR / event_filename, index=False)\n",
+    "            except Exception as e:\n",
+    "                print(f\"Failed to write {event_name}: {e}\")\n",
+    "        try:\n",
+    "            if \"result\" in tools.columns:\n",
+    "                tools = tools.drop(columns=[\"result\"])\n",
+    "            if 'error' in tools.columns:\n",
+    "                tools['error'] = tools['error'].astype(bool)\n",
+    "            tools.to_parquet(DATA_DIR / filename, index=False)\n",
+    "        except Exception as e:\n",
+    "            print(f\"Failed to write tools data: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# store_progress(filename, event_to_contents, tools)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if 'result' in transformed_deliver.columns:\n",
+    "    transformed_deliver = transformed_deliver.drop(columns=['result'])\n",
+    "if 'error' in transformed_deliver.columns:\n",
+    "    transformed_deliver['error'] = transformed_deliver['error'].astype(bool)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_deliver.to_parquet(\"transformed_deliver.parquet\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "d = pd.read_parquet(\"transformed_deliver.parquet\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### duck db"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import duckdb\n",
+    "from datetime import datetime, timedelta\n",
+    "\n",
+    "# Calculate the date for two months ago\n",
+    "two_months_ago = (datetime.now() - timedelta(days=60)).strftime('%Y-%m-%d')\n",
+    "\n",
+    "# Connect to an in-memory DuckDB instance\n",
+    "con = duckdb.connect(':memory:')\n",
+    "\n",
+    "# Perform a SQL query to select data from the past two months directly from the Parquet file\n",
+    "query = f\"\"\"\n",
+    "SELECT *\n",
+    "FROM read_parquet('/Users/arshath/play/openautonomy/olas-prediction-live-dashboard_old/data/tools.parquet')\n",
+    "WHERE request_time >= '{two_months_ago}'\n",
+    "\"\"\"\n",
+    "\n",
+    "# Fetch the result as a pandas DataFrame\n",
+    "df = con.execute(query).fetchdf()\n",
+    "\n",
+    "# Close the connection\n",
+    "con.close()\n",
+    "\n",
+    "# Print the DataFrame\n",
+    "print(df)"
    ]
   },
   {