HarpreetK
/

AIMO3

Model card Files Files and versions

xet

Community

HarpreetK commited on Apr 22

Commit

773c534

verified ·

1 Parent(s): 03ec278

Upload gpt_oss_nostepback24.ipynb

Browse files

Files changed (1) hide show

gpt_oss_nostepback24.ipynb +1717 -0

gpt_oss_nostepback24.ipynb ADDED Viewed

	@@ -0,0 +1,1717 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "7a9b41cb",
+      "metadata": {
+        "editable": true,
+        "papermill": {
+          "duration": 0.004165,
+          "end_time": "2026-04-12T05:01:44.648318+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:01:44.644153+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "7a9b41cb"
+      },
+      "source": [
+        "\n",
+        "# Setup The Environment"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "73e5d3dc",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:01:44.656112Z",
+          "iopub.status.busy": "2026-04-12T05:01:44.655548Z",
+          "iopub.status.idle": "2026-04-12T05:01:44.660512Z",
+          "shell.execute_reply": "2026-04-12T05:01:44.660110Z"
+        },
+        "papermill": {
+          "duration": 0.009642,
+          "end_time": "2026-04-12T05:01:44.661341+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:01:44.651699+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "73e5d3dc"
+      },
+      "outputs": [],
+      "source": [
+        "# Track Overall Time\n",
+        "import time\n",
+        "global_deadline = time.perf_counter() + 5*3600\n",
+        "global_remaining = global_deadline - time.perf_counter()\n",
+        "cutoff_duration = global_remaining - 350\n",
+        "def get_global_remaining():\n",
+        "    return max(0, global_deadline - time.perf_counter())"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "5f9da4be",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:01:44.669660Z",
+          "iopub.status.busy": "2026-04-12T05:01:44.669504Z",
+          "iopub.status.idle": "2026-04-12T05:01:44.672401Z",
+          "shell.execute_reply": "2026-04-12T05:01:44.672047Z"
+        },
+        "papermill": {
+          "duration": 0.007624,
+          "end_time": "2026-04-12T05:01:44.673227+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:01:44.665603+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "5f9da4be"
+      },
+      "outputs": [],
+      "source": [
+        "import os, sys\n",
+        "original_pythonpath = os.environ.get(\"PYTHONPATH\", \"\")\n",
+        "path1 = '/kaggle/input/datasets/hpkaur34/gptoss/Gpt-oss'\n",
+        "path2 = '/kaggle/usr/lib/notebooks/hpkaur34/install_utility_nemo_run/'\n",
+        "new_paths = f\"{path1}:{path2}\"\n",
+        "merged_pythonpath = f\"{new_paths}:{original_pythonpath}\" if original_pythonpath else new_path\n",
+        "os.environ[\"PYTHONPATH\"] = merged_pythonpath\n",
+        "sys.path.append('/kaggle/input/datasets/hpkaur34/gptoss/Gpt-oss')\n",
+        "sys.path.append('/kaggle/usr/lib/notebooks/hpkaur34/install_utility_nemo_run/')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "66edb1b7",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:01:44.680830Z",
+          "iopub.status.busy": "2026-04-12T05:01:44.680652Z",
+          "iopub.status.idle": "2026-04-12T05:05:01.636413Z",
+          "shell.execute_reply": "2026-04-12T05:05:01.635973Z"
+        },
+        "papermill": {
+          "duration": 196.960959,
+          "end_time": "2026-04-12T05:05:01.637603+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:01:44.676644+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "66edb1b7"
+      },
+      "outputs": [],
+      "source": [
+        "import subprocess\n",
+        "def set_env(input_archive, temp_dir):\n",
+        "    if not os.path.exists(temp_dir):\n",
+        "        os.makedirs(temp_dir, exist_ok=True)\n",
+        "        subprocess.run(['tar', '-xzf', input_archive, '-C', temp_dir], check=True)\n",
+        "    subprocess.run([\n",
+        "        sys.executable,\n",
+        "        '-m',\n",
+        "        'pip',\n",
+        "        'install',\n",
+        "        '--no-index',\n",
+        "        '--find-links',\n",
+        "        f'{temp_dir}/wheels',\n",
+        "        'paramiko',\n",
+        "        'math_verify',\n",
+        "        'litellm',\n",
+        "        'flashinfer-python',\n",
+        "        'vllm==0.11.2',\n",
+        "        'openai_harmony',\n",
+        "    ], check=False)\n",
+        "\n",
+        "try:\n",
+        "    set_env(\n",
+        "        input_archive='/kaggle/usr/lib/notebooks/hpkaur34/aimo_utility_copy/wheels.tar.gz',\n",
+        "        temp_dir='/kaggle/tmp/setup'\n",
+        "    )\n",
+        "except Exception as e:\n",
+        "    print(f\"⚠️ set_env failed: {e}\")\n",
+        "    print(\"Continuing execution...\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "cd9b33b0",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:01.648387Z",
+          "iopub.status.busy": "2026-04-12T05:05:01.648226Z",
+          "iopub.status.idle": "2026-04-12T05:05:05.396043Z",
+          "shell.execute_reply": "2026-04-12T05:05:05.395545Z"
+        },
+        "papermill": {
+          "duration": 3.754774,
+          "end_time": "2026-04-12T05:05:05.397566+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:01.642792+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "cd9b33b0"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
+        "import torch"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "162dc6d1",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:05.408255Z",
+          "iopub.status.busy": "2026-04-12T05:05:05.408020Z",
+          "iopub.status.idle": "2026-04-12T05:05:05.411789Z",
+          "shell.execute_reply": "2026-04-12T05:05:05.411435Z"
+        },
+        "papermill": {
+          "duration": 0.009988,
+          "end_time": "2026-04-12T05:05:05.412633+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:05.402645+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "162dc6d1"
+      },
+      "outputs": [],
+      "source": [
+        "\"\"\"\n",
+        "import logging\n",
+        "logging.basicConfig(level=logging.DEBUG)\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ddfb0193",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:05.422698Z",
+          "iopub.status.busy": "2026-04-12T05:05:05.422543Z",
+          "iopub.status.idle": "2026-04-12T05:05:07.847229Z",
+          "shell.execute_reply": "2026-04-12T05:05:07.846480Z"
+        },
+        "papermill": {
+          "duration": 2.431309,
+          "end_time": "2026-04-12T05:05:07.848706+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:05.417397+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "ddfb0193"
+      },
+      "outputs": [],
+      "source": [
+        "import asyncio\n",
+        "import torch\n",
+        "import subprocess\n",
+        "import warnings\n",
+        "import glob\n",
+        "import kaggle_evaluation.aimo_3_inference_server\n",
+        "import pandas as pd\n",
+        "import traceback\n",
+        "import nest_asyncio\n",
+        "import httpx\n",
+        "import re\n",
+        "import time\n",
+        "import copy\n",
+        "import json\n",
+        "import requests\n",
+        "import pandas as pd\n",
+        "import polars as pl\n",
+        "from collections import Counter\n",
+        "from typing import List\n",
+        "import secrets\n",
+        "pd.set_option('display.max_colwidth', None)\n",
+        "warnings.filterwarnings(\"ignore\", category=SyntaxWarning)\n",
+        "nest_asyncio.apply()\n",
+        "os.environ[\"TORCH_COMPILE_DISABLE\"] = \"1\"\n",
+        "os.environ[\"TORCHDYNAMO_DISABLE\"] = \"1\"\n",
+        "os.environ['TRANSFORMERS_NO_FLAX'] = '1'\n",
+        "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
+        "os.environ['TOKENIZERS_PARALLELISM'] = 'false'\n",
+        "os.environ['TRITON_PTXAS_PATH'] = '/usr/local/cuda/bin/ptxas'\n",
+        "os.environ['TIKTOKEN_RS_CACHE_DIR']= \"/kaggle/input/datasets/hpkaur34/harmony-encoding\"\n",
+        "os.environ[\"TORCH_CUDA_ARCH_LIST\"] = '9.0'\n",
+        "#os.environ[\"VLLM_USE_FLASHINFER_SAMPLER\"]= \"1\"\n",
+        "# Below will change in kaggle\n",
+        "from collections import Counter, defaultdict"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "aad4e792",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:07.859567Z",
+          "iopub.status.busy": "2026-04-12T05:05:07.859290Z",
+          "iopub.status.idle": "2026-04-12T05:05:07.861900Z",
+          "shell.execute_reply": "2026-04-12T05:05:07.861517Z"
+        },
+        "papermill": {
+          "duration": 0.008918,
+          "end_time": "2026-04-12T05:05:07.862720+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:07.853802+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "aad4e792"
+      },
+      "outputs": [],
+      "source": [
+        "# This will change in kaggle\n",
+        "os.environ[\"TORCHINDUCTOR_CACHE_DIR\"] = \"torch_cache\"\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "3a837b51",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:07.873064Z",
+          "iopub.status.busy": "2026-04-12T05:05:07.872913Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.870524Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.870030Z"
+        },
+        "papermill": {
+          "duration": 46.004343,
+          "end_time": "2026-04-12T05:05:53.871905+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:07.867562+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "3a837b51"
+      },
+      "outputs": [],
+      "source": [
+        "from nemo_skills.code_execution.sandbox import get_sandbox\n",
+        "from nemo_skills.inference.model import get_code_execution_model\n",
+        "from nemo_skills.prompt.utils import get_prompt\n",
+        "from nemo_skills.inference.model import get_model"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "b69b4ab8",
+      "metadata": {
+        "papermill": {
+          "duration": 0.005047,
+          "end_time": "2026-04-12T05:05:53.882182+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.877135+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "b69b4ab8"
+      },
+      "source": [
+        "# Configuration Parameters"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e29f99c0",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.892569Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.892197Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.895885Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.895524Z"
+        },
+        "papermill": {
+          "duration": 0.009849,
+          "end_time": "2026-04-12T05:05:53.896584+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.886735+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "e29f99c0"
+      },
+      "outputs": [],
+      "source": [
+        "host = \"127.0.0.1\"\n",
+        "port = 5000\n",
+        "tp_size = 1\n",
+        "max_public = 10\n",
+        "max_tokens = 80000\n",
+        "max_input_tokens = 1800\n",
+        "tokens_to_generate =  78200 - 10\n",
+        "max_batch_size = 8\n",
+        "timeout_seconds = 300\n",
+        "global_buffer = 350\n",
+        "finish_at_last_n = 2\n",
+        "max_code_output_characters = 1100\n",
+        "code_execution_timeout = 5\n",
+        "max_code_executions = 125\n",
+        "g_score = 0\n",
+        "g_count = 0\n",
+        "prompt_score = Counter()\n",
+        "sampling_params = {\n",
+        "    \"tokens_to_generate\": tokens_to_generate,\n",
+        "    \"temperature\": 1, # 0.2,\n",
+        "    \"top_p\": 1,\n",
+        "}\n",
+        "\n",
+        "thoughts = [\"\"] * 50\n",
+        "thoughts = thoughts[:max_batch_size]\n",
+        "i = 0"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "78bd61b1",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.906656Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.906492Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.908783Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.908391Z"
+        },
+        "papermill": {
+          "duration": 0.008556,
+          "end_time": "2026-04-12T05:05:53.909622+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.901066+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "78bd61b1"
+      },
+      "outputs": [],
+      "source": [
+        "model_path = \"/kaggle/input/models/hpkaur34/gpt-oss-120b/transformers/default/1\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "f15f7036",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.919963Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.919804Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.922192Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.921752Z"
+        },
+        "papermill": {
+          "duration": 0.008918,
+          "end_time": "2026-04-12T05:05:53.923346+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.914428+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "f15f7036"
+      },
+      "outputs": [],
+      "source": [
+        "import json"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "848f046a",
+      "metadata": {
+        "papermill": {
+          "duration": 0.004638,
+          "end_time": "2026-04-12T05:05:53.932771+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.928133+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "848f046a"
+      },
+      "source": [
+        "# Start Server - Load Model & Sandbox"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a36cbdd9",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.943233Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.943062Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.946339Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.945949Z"
+        },
+        "papermill": {
+          "duration": 0.010016,
+          "end_time": "2026-04-12T05:05:53.947523+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.937507+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "a36cbdd9"
+      },
+      "outputs": [],
+      "source": [
+        "server_started = False\n",
+        "def load_model():\n",
+        "    cmd = [\n",
+        "        \"python\",\n",
+        "        \"-m\",\n",
+        "        \"nemo_skills.inference.server.serve_vllm\",\n",
+        "        f\"--model={model_path}\",\n",
+        "        \"--port=5000\",\n",
+        "        \"--num_gpus=1\",\n",
+        "        \"--max_model_len=80000\",\n",
+        "        \"--max_num_batched_tokens=65000\",\n",
+        "        \"--max_num_seqs=13\",\n",
+        "        \"--max-cudagraph-capture-size=2048\",\n",
+        "        \"--gpu_memory_utilization=0.96\",\n",
+        "        \"--kv_cache_dtype=fp8_e4m3\",\n",
+        "        \"--stream-interval=200\",\n",
+        "        \"--enable-prefix-caching\",\n",
+        "        \"--uvicorn-log-level debug\",\n",
+        "        \"--enable-log-requests\",\n",
+        "        \"--enable-log-outputs\",\n",
+        "        \"--async-scheduling\",\n",
+        "         ]\n",
+        "\n",
+        "    log_file = open(\"vllm.log\", \"w\")\n",
+        "    vllm_server = subprocess.Popen(\n",
+        "      cmd,\n",
+        "      stdout=log_file,\n",
+        "      stderr=log_file,\n",
+        "      text=True,\n",
+        "      bufsize=1  # line-buffered\n",
+        "    )\n",
+        "    return vllm_server"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "8824bf5d",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.957943Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.957786Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.960536Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.960150Z"
+        },
+        "papermill": {
+          "duration": 0.008917,
+          "end_time": "2026-04-12T05:05:53.961372+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.952455+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "8824bf5d"
+      },
+      "outputs": [],
+      "source": [
+        "vllm_server=load_model()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "563c5247",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.971887Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.971705Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.974875Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.974510Z"
+        },
+        "papermill": {
+          "duration": 0.009755,
+          "end_time": "2026-04-12T05:05:53.976064+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.966309+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "563c5247"
+      },
+      "outputs": [],
+      "source": [
+        "def wait_for_server(url=f\"http://{host}:{port}\", timeout=1200):\n",
+        "    start = time.perf_counter()\n",
+        "    while True:\n",
+        "        try:\n",
+        "            r = requests.get(f\"{url}/docs\")\n",
+        "            if r.status_code == 200:\n",
+        "                print(\"✅ Server is ready\",time.perf_counter()-start)\n",
+        "                return True\n",
+        "        except Exception:\n",
+        "            pass\n",
+        "\n",
+        "        if time.perf_counter() - start > timeout:\n",
+        "            raise TimeoutError(\"Server did not start in time\")\n",
+        "\n",
+        "        time.sleep(1)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "33c6222d",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.986603Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.986295Z",
+          "iopub.status.idle": "2026-04-12T05:05:53.988966Z",
+          "shell.execute_reply": "2026-04-12T05:05:53.988576Z"
+        },
+        "papermill": {
+          "duration": 0.009033,
+          "end_time": "2026-04-12T05:05:53.989770+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.980737+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "33c6222d"
+      },
+      "outputs": [],
+      "source": [
+        "def sandbox_server():\n",
+        "    log_file = open(\"sandbox.log\", \"w\")\n",
+        "    sandbox_process = subprocess.Popen(\n",
+        "        [\"python\", \"-m\", \"nemo_skills.code_execution.local_sandbox.local_sandbox_server\"],\n",
+        "                stdout=log_file,\n",
+        "                stderr=log_file,\n",
+        "                text=True,\n",
+        "                bufsize=1)\n",
+        "\n",
+        "    time.sleep(3)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "75ac913f",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:53.999737Z",
+          "iopub.status.busy": "2026-04-12T05:05:53.999550Z",
+          "iopub.status.idle": "2026-04-12T05:05:59.047417Z",
+          "shell.execute_reply": "2026-04-12T05:05:59.046670Z"
+        },
+        "papermill": {
+          "duration": 5.054699,
+          "end_time": "2026-04-12T05:05:59.048988+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:53.994289+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "75ac913f"
+      },
+      "outputs": [],
+      "source": [
+        "time.sleep(2)\n",
+        "sandbox_server()\n",
+        "sandbox = get_sandbox()  # localhost by default"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "0f199401",
+      "metadata": {
+        "papermill": {
+          "duration": 0.004869,
+          "end_time": "2026-04-12T05:05:59.059135+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:59.054266+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "0f199401"
+      },
+      "source": [
+        "# Prompt Types and Updating Prompt"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ba16a0bb",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:59.070015Z",
+          "iopub.status.busy": "2026-04-12T05:05:59.069827Z",
+          "iopub.status.idle": "2026-04-12T05:05:59.073193Z",
+          "shell.execute_reply": "2026-04-12T05:05:59.072795Z"
+        },
+        "papermill": {
+          "duration": 0.009911,
+          "end_time": "2026-04-12T05:05:59.074008+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:59.064097+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "ba16a0bb"
+      },
+      "outputs": [],
+      "source": [
+        "default_prompt = (\n",
+        "        'You are an elite mathematical problem solver with expertise at the International '\n",
+        "        'Mathematical Olympiad (IMO) level. Your goal is to find the correct answer through '\n",
+        "        'rigorous mathematical reasoning.\\n\\n'\n",
+        "\n",
+        "        '# Problem-Solving Approach:\\n'\n",
+        "        '1. UNDERSTAND: Carefully read and rephrase the problem in your own words. '\n",
+        "        'Identify what is given, what needs to be found, and any constraints.\\n'\n",
+        "        '2. EXPLORE: Consider multiple solution strategies. Think about relevant theorems, '\n",
+        "        'techniques, patterns, or analogous problems. Don\\'t commit to one approach immediately.\\n'\n",
+        "        '3. PLAN: Select the most promising approach and outline key steps before executing.\\n'\n",
+        "        '4. EXECUTE: Work through your solution methodically. Show all reasoning steps clearly.\\n'\n",
+        "        '5. VERIFY: Check your answer by substituting back, testing edge cases, or using '\n",
+        "        'alternative methods. Ensure logical consistency throughout.\\n\\n'\n",
+        "\n",
+        "        '# Mathematical Reasoning Principles:\\n'\n",
+        "        '- Break complex problems into smaller, manageable sub-problems\\n'\n",
+        "        '- Look for patterns, symmetries, and special cases that provide insight\\n'\n",
+        "        '- Use concrete examples to build intuition before generalizing\\n'\n",
+        "        '- Consider extreme cases and boundary conditions\\n'\n",
+        "        '- If stuck, try working backwards from the desired result\\n'\n",
+        "        '- Be willing to restart with a different approach if needed\\n\\n'\n",
+        "\n",
+        "        '# Verification Requirements:\\n'\n",
+        "        '- Cross-check arithmetic and algebraic manipulations\\n'\n",
+        "        '- Verify that your solution satisfies all problem constraints\\n'\n",
+        "        '- Test your answer with simple cases or special values when possible\\n'\n",
+        "        '- Ensure dimensional consistency and reasonableness of the result\\n\\n'\n",
+        "\n",
+        "        \"#RESPONSE FORMAT:\\n\\n\"\n",
+        "        \"The final answer must be a non-negative integer.\\n. Instead of the \\\\boxed{} format use json format. Follow the instructions for the format-\"\n",
+        "        ' \"Answer\": <non-negative integer>,\"Confidence\": <number between 0 and 1>'\n",
+        "        \"Do not output any additional reasoning after this JSON.\\n\"\n",
+        "        \"Do not output any additional reasoning after this JSON.\\n\"\n",
+        "    )\n",
+        ""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e61b75fe",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:05:59.084817Z",
+          "iopub.status.busy": "2026-04-12T05:05:59.084430Z",
+          "iopub.status.idle": "2026-04-12T05:06:00.071004Z",
+          "shell.execute_reply": "2026-04-12T05:06:00.070468Z"
+        },
+        "papermill": {
+          "duration": 0.993574,
+          "end_time": "2026-04-12T05:06:00.072353+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:05:59.078779+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "e61b75fe"
+      },
+      "outputs": [],
+      "source": [
+        "# Below will change\n",
+        "system_message='{system_prompt}'\n",
+        "prompt_template = get_prompt(prompt_config='gpt-oss/math',system_message=system_message,tokenizer=model_path,code_tags=\"gpt-oss\")\n",
+        "chat_template_kwargs = {\n",
+        "    \"builtin_tools\": [\"python\"],\n",
+        "    \"reasoning_effort\":\"high\"\n",
+        "\n",
+        "}"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "95a2110d",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:00.083120Z",
+          "iopub.status.busy": "2026-04-12T05:06:00.082940Z",
+          "iopub.status.idle": "2026-04-12T05:06:00.085743Z",
+          "shell.execute_reply": "2026-04-12T05:06:00.085363Z"
+        },
+        "papermill": {
+          "duration": 0.009146,
+          "end_time": "2026-04-12T05:06:00.086560+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.077414+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "95a2110d"
+      },
+      "outputs": [],
+      "source": [
+        "def safe_concat(a, b,function_name):\n",
+        "    if a is None or b is None:\n",
+        "        raise ValueError(f\"Cannot concatenate: a={a}, b={b}, Error Raised from function {function_name}\")\n",
+        "    return a + b"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "21d172dc",
+      "metadata": {
+        "papermill": {
+          "duration": 0.004736,
+          "end_time": "2026-04-12T05:06:00.096017+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.091281+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "21d172dc"
+      },
+      "source": [
+        "# Data Extraction & Early Stopping"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e27ba473",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:00.106688Z",
+          "iopub.status.busy": "2026-04-12T05:06:00.106508Z",
+          "iopub.status.idle": "2026-04-12T05:06:00.113188Z",
+          "shell.execute_reply": "2026-04-12T05:06:00.112798Z"
+        },
+        "papermill": {
+          "duration": 0.013124,
+          "end_time": "2026-04-12T05:06:00.114007+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.100883+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "e27ba473"
+      },
+      "outputs": [],
+      "source": [
+        "class Result:\n",
+        "    def __init__(self):\n",
+        "        self.early_stop_flag = False\n",
+        "    def best_voted_answer(self):\n",
+        "        return self.best_answer\n",
+        "\n",
+        "    def majority_voting(self, answer_list):\n",
+        "        count = defaultdict(float)\n",
+        "        # Keep raw list separate; filter into valid_answers\n",
+        "        self.answer_list = answer_list\n",
+        "        self.valid_answers = [x[\"Answer\"] for x in self.answer_list if x[\"Answer\"] != -1]\n",
+        "        print(\"Answer_list after popping -1\", self.valid_answers, \"%%%%\")\n",
+        "\n",
+        "        # BUG FIX: set fallback when all answers are invalid\n",
+        "        if len(self.valid_answers) == 0:\n",
+        "            self.best_answer = None\n",
+        "            self.best_count = 0\n",
+        "            self.second_count = 0\n",
+        "            self.sorted_answers = []\n",
+        "            return\n",
+        "\n",
+        "        for a in self.valid_answers:\n",
+        "            count[a] += 1\n",
+        "        self.sorted_answers = sorted(count.items(), key=lambda x: x[1], reverse=True)\n",
+        "\n",
+        "        self.best_answer, self.best_count = self.sorted_answers[0]\n",
+        "        self.second_count = self.sorted_answers[1][1] if len(self.sorted_answers) > 1 else 0\n",
+        "\n",
+        "        if (\n",
+        "            self.best_count == 1\n",
+        "            and self.best_answer == 0\n",
+        "            and len(self.sorted_answers) > 1\n",
+        "            and self.sorted_answers[1] is not None\n",
+        "        ):\n",
+        "\n",
+        "            self.best_answer, self.best_count = self.sorted_answers[1]\n",
+        "\n",
+        "\n",
+        "    def early_stop(self, answer_list, num_done):\n",
+        "        print(\"Num_done is\",num_done)\n",
+        "        self.num_done = num_done\n",
+        "        self.majority_voting(answer_list)\n",
+        "        n_valid = len(self.valid_answers)\n",
+        "        best = self.best_count\n",
+        "        gap = self.best_count - self.second_count\n",
+        "        print(f\"Num done: {self.num_done}, Valid answers: {n_valid}, \"\n",
+        "              f\"Best count: {best}, Second count: {self.second_count}\")\n",
+        "\n",
+        "        if n_valid == 0:\n",
+        "            return False\n",
+        "\n",
+        "        if best >= 3 and gap >= 1:\n",
+        "            self.early_stop_flag = True\n",
+        "            print(f\">>> EARLY STOP at {self.num_done} completions | \"\n",
+        "                  f\"best={self.best_answer} (count={best}, gap={gap})\")\n",
+        "\n",
+        "        return self.early_stop_flag\n",
+        "\n",
+        "    def get_best_answer(self,answer_list, num_done, flag):\n",
+        "        if not flag:\n",
+        "            self.majority_voting(answer_list)\n",
+        "        else:\n",
+        "            self.early_stop(answer_list, num_done)\n",
+        "        return self.best_voted_answer(), self.early_stop_flag\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "08da144a",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:00.124730Z",
+          "iopub.status.busy": "2026-04-12T05:06:00.124556Z",
+          "iopub.status.idle": "2026-04-12T05:06:00.130593Z",
+          "shell.execute_reply": "2026-04-12T05:06:00.130188Z"
+        },
+        "papermill": {
+          "duration": 0.012614,
+          "end_time": "2026-04-12T05:06:00.131471+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.118857+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "08da144a"
+      },
+      "outputs": [],
+      "source": [
+        "import re, requests\n",
+        "\n",
+        "class Answer:\n",
+        "    def __init__(self):\n",
+        "        self.best_answer = None\n",
+        "        self.input_message  = \"\"\n",
+        "        self.best_count = 0\n",
+        "        self.second_count = 0\n",
+        "        self.answer_list = []          # ← was None, init as empty list\n",
+        "        self.early_stop_flag = False\n",
+        "        self.sorted_answers = []\n",
+        "        self.valid_answers = []        # ← filtered list (no -1s), kept separate\n",
+        "        self.sampling_param = {\n",
+        "            \"tokens_to_generate\": 7000,\n",
+        "            \"temperature\": 0.9, # 0.2,\n",
+        "            \"top_p\": 0.95,\n",
+        "             }\n",
+        "        self.timeout = httpx.Timeout(\n",
+        "           connect=60.0,\n",
+        "           read=300.0,\n",
+        "           write=60.0,\n",
+        "           pool=120.0,\n",
+        "        )\n",
+        "\n",
+        "    def clean_messages(self, text):\n",
+        "        cleaned = re.sub(r'<\\|[^|]*\\|>', '', text)\n",
+        "        return cleaned.strip()\n",
+        "\n",
+        "\n",
+        "    async def extract_answer(self, question, model_output):\n",
+        "        answer = -1\n",
+        "        confidence = -0.1\n",
+        "        seed = secrets.randbits(32)\n",
+        "        input_message = self.clean_messages(model_output)\n",
+        "        rid = secrets.token_hex(8)\n",
+        "        message = prompt_template.fill(\n",
+        "                input_dict={\n",
+        "                    \"problem\": safe_concat(question,input_message,\"extract_answer\"),\n",
+        "                    \"system_prompt\": promptobj.get_dprompt(\"extract_answer\"),\n",
+        "                },\n",
+        "                chat_template_kwargs = chat_template_kwargs,\n",
+        "                format_as_string=True\n",
+        "            )\n",
+        "        print(prompt_template)\n",
+        "        print(\"textd was called\")\n",
+        "        try:\n",
+        "            data, completion_tokens = await server_obj.generate_response(\n",
+        "                prompt=message,\n",
+        "                random_seed=seed,\n",
+        "                stream=True,\n",
+        "                calling_function = \"extract_answer\",\n",
+        "                extra_body={\"request_id\": rid, \"reasoning_effort\":\"medium\"},\n",
+        "                timeout = self.timeout,\n",
+        "               **self.sampling_param,\n",
+        "             )\n",
+        "\n",
+        "            if data is not None and isinstance(data, dict):\n",
+        "                return data\n",
+        "            else:\n",
+        "                return {\"Answer\":-1, \"Confidence\":-0.1}\n",
+        "\n",
+        "        except Exception as e:\n",
+        "            print(f\"[extract_answer failed] {type(e).__name__}: {e}\")\n",
+        "            return {\"Answer\":answer,\"Confidence\": confidence}\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "id": "32920345",
+      "metadata": {
+        "papermill": {
+          "duration": 0.004946,
+          "end_time": "2026-04-12T05:06:00.141342+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.136396+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "32920345"
+      },
+      "source": [
+        "# Inference"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "88e12926",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:00.152565Z",
+          "iopub.status.busy": "2026-04-12T05:06:00.151995Z",
+          "iopub.status.idle": "2026-04-12T05:06:00.977284Z",
+          "shell.execute_reply": "2026-04-12T05:06:00.976809Z"
+        },
+        "papermill": {
+          "duration": 0.832359,
+          "end_time": "2026-04-12T05:06:00.978691+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.146332+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "88e12926"
+      },
+      "outputs": [],
+      "source": [
+        "# Below will change in kaggle\n",
+        "#Instantiate Server Object\n",
+        "server_obj = get_code_execution_model(server_type = 'vllm',\n",
+        "                                model=model_path,\n",
+        "                               base_url=\"http://127.0.0.1:5000/v1\",\n",
+        "                               api_key='EMPTY',\n",
+        "                               sandbox=sandbox,\n",
+        "                               code_execution={\n",
+        "                                'max_code_output_characters': max_code_output_characters,\n",
+        "                                'code_execution_timeout': code_execution_timeout,\n",
+        "                                'max_code_executions': max_code_executions,\n",
+        "                               })\n",
+        "\n",
+        "async def abort_request(request_ids: str | list[str]):\n",
+        "    \"\"\"Sequential best-effort server-side abort.\n",
+        "    Uses short timeouts so a slow/down server doesn't block.\n",
+        "    Silently ignores failures.\n",
+        "    \"\"\"\n",
+        "    if isinstance(request_ids, str):\n",
+        "        request_ids = [request_ids]\n",
+        "\n",
+        "    timeout = httpx.Timeout(connect=1.0, read=2.0, write=1.0, pool=1.0)\n",
+        "\n",
+        "    async with httpx.AsyncClient(timeout=timeout) as client:\n",
+        "        for rid in request_ids:\n",
+        "            try:\n",
+        "                await client.delete(f\"http://{host}:{port}/v1/requests/{rid}\")\n",
+        "            except Exception:\n",
+        "                # optionally log instead of silent pass\n",
+        "                pass\n",
+        "            await asyncio.sleep(0.05)  # cooperative yield"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "1134eef5",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:00.990214Z",
+          "iopub.status.busy": "2026-04-12T05:06:00.990023Z",
+          "iopub.status.idle": "2026-04-12T05:06:01.002419Z",
+          "shell.execute_reply": "2026-04-12T05:06:01.002010Z"
+        },
+        "papermill": {
+          "duration": 0.019361,
+          "end_time": "2026-04-12T05:06:01.003298+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:00.983937+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "1134eef5"
+      },
+      "outputs": [],
+      "source": [
+        "class ClientClass:\n",
+        "    def __init__(self, prompt):\n",
+        "        global sampling_params\n",
+        "        self.thresh_hold = 3                         # minimum completions before checking early stop\n",
+        "        self.system_prompt = prompt\n",
+        "        self.answer = {}\n",
+        "        self.randomseed_list = []\n",
+        "        self.num_done = 0\n",
+        "        self.sampling_param = copy.deepcopy(sampling_params)\n",
+        "        self.question = \"\"\n",
+        "        self.finished_generations = []\n",
+        "        self.final_answer = None\n",
+        "        self.early_stop_flag = False\n",
+        "        self.flattened_prompt_list = []\n",
+        "        self.list_of_questions = []\n",
+        "        self.answer_list = []\n",
+        "        self.request_ids = []  # per-task IDs for server-side abort\n",
+        "        self.tasks = []\n",
+        "        self.timeout = httpx.Timeout(\n",
+        "            connect=30.0,\n",
+        "            read= 500.0 ,\n",
+        "            write=30.0,\n",
+        "            pool=120.0,\n",
+        "        )\n",
+        "        self.answerobj = Answer()\n",
+        "\n",
+        "    async def send_request_to_server(self):\n",
+        "        print(\"Request sent\")\n",
+        "        self.request_ids = [secrets.token_hex(8) for _ in self.list_of_questions]\n",
+        "        self.randomseed_list = [k for k in range(len(self.list_of_questions))]\n",
+        "        for prompt, seed, rid in zip(self.list_of_questions, self.randomseed_list, self.request_ids):\n",
+        "            task = asyncio.create_task(\n",
+        "                      server_obj.generate_async(\n",
+        "                        prompt=prompt,\n",
+        "                        random_seed=seed,\n",
+        "                        timeout=self.timeout,\n",
+        "                        remove_stop_phrases=False,\n",
+        "                        stream = True,\n",
+        "                        extra_body={\"request_id\": rid,\"enable_thinking\":True,\"reasoning_effort\":\"high\"},\n",
+        "                        **prompt_template.get_code_execution_args(),\n",
+        "                        **self.sampling_param,\n",
+        "                         )\n",
+        "                      )\n",
+        "            self.tasks.append(task)\n",
+        "\n",
+        "        try:\n",
+        "            processed = set()\n",
+        "            for completed in asyncio.as_completed(self.tasks):\n",
+        "                try:\n",
+        "                    result = await completed\n",
+        "                    self.num_done += 1\n",
+        "                    processed.add(completed)   # this adds the task to processed\n",
+        "                    self.finished_generations.append(result[\"generation\"])\n",
+        "                    if result[\"answer\"] is not None:\n",
+        "                        self.answer = json.loads(result[\"answer\"])\n",
+        "                        print(\"The answer and confidence after json parsing\", self.answer)\n",
+        "                        yield self.answer\n",
+        "                    else:\n",
+        "                        self.answer = await self.answerobj.extract_answer(self.question, result[\"generation\"])\n",
+        "                        print(\"The answer and confidence after interaction with 2nd model\",self.answer)\n",
+        "                        yield self.answer\n",
+        "                except GeneratorExit:\n",
+        "                    return\n",
+        "                except Exception as e:\n",
+        "                    traceback.print_exc()\n",
+        "                    error_type = type(e).__name__\n",
+        "                    print(f\"[ERROR] {error_type}\")\n",
+        "                    traceback.print_exc()\n",
+        "                    self.answer = {\n",
+        "                        \"Answer\": -1,\n",
+        "                        \"Confidence\": -0.1,\n",
+        "                    }\n",
+        "                    yield self.answer\n",
+        "\n",
+        "        finally:\n",
+        " #fallback in the Pipeline timeout handler. Timout\n",
+        "            for t in self.tasks:\n",
+        "                if t.done() and t not in processed:\n",
+        "                    try:\n",
+        "                        if not t.cancelled() and t.exception() is None:\n",
+        "                            self.res = t.result()\n",
+        "\n",
+        "                        elif t.exception() is not None:\n",
+        "                         # optional: handle failed tasks\n",
+        "                         pass\n",
+        "                    except Exception:\n",
+        "                        pass\n",
+        "                elif not t.done():\n",
+        "                    t.cancel()\n",
+        "            asyncio.create_task(abort_request(self.request_ids))\n",
+        "\n",
+        "            # Fire server-side abort independently — survives parent cancellation\n",
+        "\n",
+        "    def flatten_prompt_list(self):\n",
+        "        global max_batch_size\n",
+        "        self.flattened_prompt_list = [\n",
+        "            self.system_prompt\n",
+        "           # for system_prompt in self.prompts_list\n",
+        "            for _ in range(max_batch_size)\n",
+        "        ]\n",
+        "\n",
+        "    def generate_question_copies(self, question):\n",
+        "        self.question = question\n",
+        "        self.list_of_questions = [\n",
+        "            prompt_template.fill(\n",
+        "                input_dict={\n",
+        "                    \"problem\": question,\n",
+        "                    \"system_prompt\": system_prompt,\n",
+        "                },\n",
+        "                chat_template_kwargs = chat_template_kwargs,\n",
+        "                format_as_string=True\n",
+        "            )\n",
+        "            for system_prompt in self.flattened_prompt_list\n",
+        "        ]\n",
+        "\n",
+        "\n",
+        "    async def predict_for_question(self, question):\n",
+        "        self.flatten_prompt_list()\n",
+        "        self.generate_question_copies(question)\n",
+        "\n",
+        "        gen = self.send_request_to_server()\n",
+        "\n",
+        "        try:\n",
+        "            async for answer in gen:\n",
+        "                yield answer\n",
+        "\n",
+        "        except Exception as e:\n",
+        "            print(\"Error in predict_for_question:\", e)\n",
+        "            raise\n",
+        "\n",
+        "        finally:\n",
+        "            try:\n",
+        "                await gen.aclose()\n",
+        "            except Exception:\n",
+        "                pass"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "5c553f96",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:01.014551Z",
+          "iopub.status.busy": "2026-04-12T05:06:01.014122Z",
+          "iopub.status.idle": "2026-04-12T05:06:01.019371Z",
+          "shell.execute_reply": "2026-04-12T05:06:01.018984Z"
+        },
+        "papermill": {
+          "duration": 0.011845,
+          "end_time": "2026-04-12T05:06:01.020211+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:01.008366+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "5c553f96"
+      },
+      "outputs": [],
+      "source": [
+        "import math\n",
+        "\n",
+        "class BufferBorrower:\n",
+        "    \"\"\"\n",
+        "    Dynamic buffer-time borrowing strategy for inference.\n",
+        "\n",
+        "    Borrows from buffer time based on task difficulty and step-back\n",
+        "    token usage, using a sigmoid curve for smooth allocation.\n",
+        "\n",
+        "    Parameters\n",
+        "    ----------\n",
+        "    max_difficulty : int or float\n",
+        "        The upper bound of the difficulty scale (e.g., 5 or 1.0).\n",
+        "    alpha : float\n",
+        "        Weight for the difficulty signal (default 0.6).\n",
+        "    beta : float\n",
+        "        Weight for the step-back token signal (default 0.4).\n",
+        "    b_max : float\n",
+        "        Maximum fraction of buffer that can be borrowed (default 0.7).\n",
+        "    k : float\n",
+        "        Steepness of the sigmoid transition (default 6).\n",
+        "    threshold : float\n",
+        "        Midpoint of the sigmoid curve (default 0.4).\n",
+        "    \"\"\"\n",
+        "\n",
+        "    def __init__(\n",
+        "        self,\n",
+        "        b_max: float = 0.85,\n",
+        "        k: float = 6.0,\n",
+        "        threshold: float = 0.4,\n",
+        "        total_questions: int = 50,\n",
+        "        total_available_time: int = 15720,\n",
+        "    ):\n",
+        "\n",
+        "        self.b_max = b_max\n",
+        "        self.k = k\n",
+        "        self.threshold = threshold\n",
+        "        self.total_questions = total_questions\n",
+        "        self.total_available_time = total_available_time\n",
+        "\n",
+        "    def compute_time_pressure(\n",
+        "        self,\n",
+        "        remaining_time: float,\n",
+        "        questions_completed: int,\n",
+        "        global_buffer: float = 0.0,\n",
+        "    ) -> float:\n",
+        "        remaining_q = max(1, self.total_questions - questions_completed)\n",
+        "        if remaining_time <= 0:\n",
+        "            return 1.5\n",
+        "        ideal_pace = self.total_available_time / self.total_questions\n",
+        "        available_pace = remaining_time / remaining_q\n",
+        "        pressure = ideal_pace / available_pace\n",
+        "        return max(0.3, min(1.5, pressure))\n",
+        "\n",
+        "    def allocate_time(\n",
+        "        self,\n",
+        "        remaining_time: float,\n",
+        "        questions_completed: int,\n",
+        "        global_buffer: float = 0.0,\n",
+        "        allowed_time : float = 320,\n",
+        "      ) -> dict:\n",
+        "        \"\"\"\n",
+        "        Allocate effective inference and remaining buffer time.\n",
+        "\n",
+        "        Parameters\n",
+        "        ----------\n",
+        "        allowed_time : float\n",
+        "            Base inference time budget.\n",
+        "        global_buffer : float\n",
+        "            global buffer time budget.\n",
+        "        difficulty : float\n",
+        "            Task difficulty score.\n",
+        "        stepback_tokens : int\n",
+        "            Tokens used in step-back phase.\n",
+        "        stepback_budget : int\n",
+        "            Total step-back token budget.\n",
+        "\n",
+        "        Returns\n",
+        "        -------\n",
+        "        dict\n",
+        "            Keys: effective_inference, remaining_buffer, borrowed,\n",
+        "                  borrow_fraction.\n",
+        "        \"\"\"\n",
+        "        pressure = self.compute_time_pressure(\n",
+        "                              remaining_time,\n",
+        "                              questions_completed,\n",
+        "                              global_buffer\n",
+        "                            )\n",
+        "        borrow_fraction = 1/pressure\n",
+        "        max_borrowable = 95\n",
+        "        print(\"borrow fraction\", borrow_fraction)\n",
+        "        borrowed = min(pressure * global_buffer, max_borrowable)\n",
+        "\n",
+        "\n",
+        "        return {\n",
+        "            \"effective_inference\": allowed_time + borrowed,\n",
+        "            \"global_buffer\": global_buffer - borrowed,\n",
+        "            \"borrowed\": borrowed,\n",
+        "            \"borrow_fraction\": borrow_fraction,\n",
+        "        }\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "ac15f646",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:01.030935Z",
+          "iopub.status.busy": "2026-04-12T05:06:01.030736Z",
+          "iopub.status.idle": "2026-04-12T05:06:01.034205Z",
+          "shell.execute_reply": "2026-04-12T05:06:01.033802Z"
+        },
+        "papermill": {
+          "duration": 0.009979,
+          "end_time": "2026-04-12T05:06:01.035087+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:01.025108+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "ac15f646"
+      },
+      "outputs": [],
+      "source": [
+        "class TimeBudget:\n",
+        "    def __init__(self, total_seconds):\n",
+        "        self.start = time.perf_counter()\n",
+        "        self.deadline = self.start + total_seconds\n",
+        "\n",
+        "    @property\n",
+        "    def remaining(self):\n",
+        "        return max(0, self.deadline - time.perf_counter())\n",
+        "\n",
+        "    @property\n",
+        "    def elapsed(self):\n",
+        "        return time.perf_counter() - self.start\n",
+        "\n",
+        "    @property\n",
+        "    def expired(self):\n",
+        "        return self.remaining <= 0\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "2a278fab",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:01.045591Z",
+          "iopub.status.busy": "2026-04-12T05:06:01.045424Z",
+          "iopub.status.idle": "2026-04-12T05:06:01.053193Z",
+          "shell.execute_reply": "2026-04-12T05:06:01.052801Z"
+        },
+        "papermill": {
+          "duration": 0.014099,
+          "end_time": "2026-04-12T05:06:01.054027+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:01.039928+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "2a278fab"
+      },
+      "outputs": [],
+      "source": [
+        "class Pipeline:\n",
+        "    def __init__(self):\n",
+        "        self.budget_seconds = 0\n",
+        "        self.k = 1\n",
+        "        self.budget_seconds = 0\n",
+        "    async def get_prediction(self, problem_text):\n",
+        "        global global_buffer, i, borrower, max_batch_size,last_30, sampling_param\n",
+        "        budgetobj = None\n",
+        "        timeout = 60\n",
+        "        # Timeout at this level - see if needs to be implemented\n",
+        "        thresh_hold = 3\n",
+        "        num_done = 0\n",
+        "        max_generation_count = self.k*max_batch_size\n",
+        "        answer_list = []\n",
+        "        finalanswerobj = Result()\n",
+        "        print(\"Pipeline step 1\")\n",
+        "        deadline = 0\n",
+        "        allowed_time = 320\n",
+        "        self.budget_seconds = allowed_time\n",
+        "        if global_buffer> 0:\n",
+        "            result = borrower.allocate_time(\n",
+        "                    remaining_time = get_global_remaining(),\n",
+        "                    questions_completed = i,\n",
+        "                    allowed_time = allowed_time,\n",
+        "                    global_buffer = global_buffer\n",
+        "             )\n",
+        "\n",
+        "            self.budget_seconds = result[\"effective_inference\"]\n",
+        "            global_buffer = result[\"global_buffer\"]\n",
+        "            print(f'borrowed={result[\"borrowed\"]:.0f}')\n",
+        "        print(f\"Budget: base={allowed_time:.0f}s \"\n",
+        "                          f\"= {self.budget_seconds:.0f}s (global remaining: {get_global_remaining():.0f}s)\")\n",
+        "        budgetobj = TimeBudget(self.budget_seconds)\n",
+        "\n",
+        "        clientobj = ClientClass(default_prompt)\n",
+        "        deadline = max(deadline, budgetobj.remaining)\n",
+        "        operation_start_time = time.perf_counter()\n",
+        "        print(\"Deadline is\", deadline)\n",
+        "        gen = clientobj.predict_for_question(problem_text)\n",
+        "        try:\n",
+        "            async with asyncio.timeout(deadline):\n",
+        "                async for answer in gen:\n",
+        "                    answer_list.append(answer)\n",
+        "                    print(\"Answer list on timeout is:-\")\n",
+        "                    print(answer_list)\n",
+        "                    num_done = len(answer_list)\n",
+        "                    if num_done >= thresh_hold and num_done < max_generation_count:\n",
+        "                        prediction, early_stop_flag = finalanswerobj.get_best_answer(answer_list, num_done, True)\n",
+        "                        if early_stop_flag:\n",
+        "                            return prediction\n",
+        "\n",
+        "                    elif num_done == max_generation_count:\n",
+        "                        prediction, _ = finalanswerobj.get_best_answer(answer_list, num_done, False)\n",
+        "                        return prediction\n",
+        "                    else:\n",
+        "                        continue\n",
+        "        except (TimeoutError, asyncio.TimeoutError):\n",
+        "            traceback.print_exc()\n",
+        "            prediction, _ = finalanswerobj.get_best_answer(answer_list, num_done, False)\n",
+        "            return prediction\n",
+        "\n",
+        "        except Exception as e:\n",
+        "            traceback.print_exc()\n",
+        "            print(f\"UNEXPECTED ERROR: {type(e).__name__} {e}\")\n",
+        "            if answer_list:\n",
+        "                prediction, _ = finalanswerobj.get_best_answer(answer_list, num_done, False)\n",
+        "                return prediction\n",
+        "            return None\n",
+        "\n",
+        "        finally:\n",
+        "            await gen.aclose()\n",
+        "            print(\"Operation duration\", time.perf_counter()-operation_start_time)\n",
+        "            if budgetobj.elapsed > self.budget_seconds:\n",
+        "                global_buffer -= (budgetobj.elapsed - self.budget_seconds)\n",
+        "            else:\n",
+        "                global_buffer += (self.budget_seconds - budgetobj.elapsed)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "e931c8db",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:01.064603Z",
+          "iopub.status.busy": "2026-04-12T05:06:01.064428Z",
+          "iopub.status.idle": "2026-04-12T05:06:01.068387Z",
+          "shell.execute_reply": "2026-04-12T05:06:01.068000Z"
+        },
+        "papermill": {
+          "duration": 0.010469,
+          "end_time": "2026-04-12T05:06:01.069242+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:01.058773+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "e931c8db"
+      },
+      "outputs": [],
+      "source": [
+        "def predict(id_: pl.Series, problem: pl.Series) -> pl.DataFrame | pd.DataFrame:\n",
+        "    \"\"\"Make a prediction.\"\"\"\n",
+        "    global server_started, i\n",
+        "    start_pred_time = time.perf_counter()\n",
+        "    pipelineobj = Pipeline()\n",
+        "    if server_started is False:\n",
+        "        server_started = wait_for_server()\n",
+        "\n",
+        "    id_ = id_.item(0)\n",
+        "    problem_text: str = problem.item(0)\n",
+        "\n",
+        "    # BUG FIX: compare duration to duration (was comparing duration to absolute timestamp)\n",
+        "    if get_global_remaining() < 30:\n",
+        "        return pl.DataFrame({\"id\": id_, \"answer\": 29443})\n",
+        "    loop = asyncio.get_event_loop()\n",
+        "    prediction = loop.run_until_complete(pipelineobj.get_prediction(problem_text))\n",
+        "\n",
+        "    # If prediction is still None after everything, use fallback\n",
+        "    if prediction is None:\n",
+        "        prediction = 29443\n",
+        "\n",
+        "    i = i + 1\n",
+        "\n",
+        "    print(\"Returned dataframe is \", pl.DataFrame({\"id\": id_, \"answer\": prediction}))\n",
+        "    return pl.DataFrame({\"id\": id_, \"answer\": prediction})\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "a7394047",
+      "metadata": {
+        "execution": {
+          "iopub.execute_input": "2026-04-12T05:06:01.079734Z",
+          "iopub.status.busy": "2026-04-12T05:06:01.079548Z",
+          "iopub.status.idle": "2026-04-12T05:17:09.993610Z",
+          "shell.execute_reply": "2026-04-12T05:17:09.993024Z"
+        },
+        "papermill": {
+          "duration": 668.920683,
+          "end_time": "2026-04-12T05:17:09.994734+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:06:01.074051+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "a7394047"
+      },
+      "outputs": [],
+      "source": [
+        "#Change the path of the csv file\n",
+        "inference_server = kaggle_evaluation.aimo_3_inference_server.AIMO3InferenceServer(\n",
+        "    predict\n",
+        ")\n",
+        "borrower = BufferBorrower(total_questions = 50, total_available_time = get_global_remaining())\n",
+        "if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):\n",
+        "    # You MUST call this within 15 minutes of the script starting. This is to\n",
+        "    # ensure a \"fast fail\" in case a bug prevents the inference server from starting.\n",
+        "    # Do anything that might take a long time (like model loading) in the predict\n",
+        "    # function, which has no time limit.\n",
+        "    try:\n",
+        "        start = time.perf_counter()\n",
+        "        inference_server.serve()\n",
+        "\n",
+        "    finally:\n",
+        "        finish =time.perf_counter()\n",
+        "        time_taken_ = finish-start\n",
+        "        print(\"Time Taken\",time_taken_)\n",
+        "        print(i)\n",
+        "        with open(\"rerun.txt\", \"a\") as f:\n",
+        "            f.write(f\"Time taken: {time_taken_:.6f},Questions Completed: {i}\\n\")\n",
+        "\n",
+        "else:\n",
+        "\n",
+        "    try:\n",
+        "        start = time.perf_counter()\n",
+        "        inference_server.run_local_gateway(\n",
+        "        ('/kaggle/input/competitions/ai-mathematical-olympiad-progress-prize-3/test.csv',))\n",
+        "\n",
+        "    finally:\n",
+        "        finish = time.perf_counter()\n",
+        "        time_taken_ = finish - start\n",
+        "        print(\"Time Taken\",time_taken_ )\n",
+        "        with open(\"info.txt\", \"a\") as f:\n",
+        "            f.write(f\" Time taken: {time_taken_:.6f},Questions Completed: {i}\\n\")\n",
+        "        print(i)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "63d04159",
+      "metadata": {
+        "papermill": {
+          "duration": 0.00535,
+          "end_time": "2026-04-12T05:17:10.005304+00:00",
+          "exception": false,
+          "start_time": "2026-04-12T05:17:09.999954+00:00",
+          "status": "completed"
+        },
+        "tags": [],
+        "id": "63d04159"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kaggle": {
+      "accelerator": "nvidiaH100",
+      "dataSources": [
+        {
+          "databundleVersionId": 14559231,
+          "sourceId": 118448,
+          "sourceType": "competition"
+        },
+        {
+          "databundleVersionId": 16263450,
+          "datasetId": 9820761,
+          "sourceId": 15353457,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 16223586,
+          "datasetId": 9797230,
+          "sourceId": 15317833,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 16583370,
+          "datasetId": 10017044,
+          "sourceId": 15647597,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 15942658,
+          "datasetId": 9642480,
+          "sourceId": 15061462,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 15744165,
+          "datasetId": 9520808,
+          "sourceId": 14881112,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 16607303,
+          "datasetId": 10033809,
+          "sourceId": 15670043,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 15706375,
+          "datasetId": 9495812,
+          "sourceId": 14846606,
+          "sourceType": "datasetVersion"
+        },
+        {
+          "databundleVersionId": 16606906,
+          "modelId": 641049,
+          "modelInstanceId": 629147,
+          "sourceId": 827437,
+          "sourceType": "modelInstanceVersion"
+        },
+        {
+          "sourceId": 303511002,
+          "sourceType": "kernelVersion"
+        },
+        {
+          "sourceId": 303518560,
+          "sourceType": "kernelVersion"
+        }
+      ],
+      "dockerImageVersionId": 31329,
+      "isGpuEnabled": true,
+      "isInternetEnabled": false,
+      "language": "python",
+      "sourceType": "notebook"
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.12"
+    },
+    "papermill": {
+      "default_parameters": {},
+      "duration": 933.409462,
+      "end_time": "2026-04-12T05:17:12.627119+00:00",
+      "environment_variables": {},
+      "exception": null,
+      "input_path": "__notebook__.ipynb",
+      "output_path": "__notebook__.ipynb",
+      "parameters": {},
+      "start_time": "2026-04-12T05:01:39.217657+00:00",
+      "version": "2.7.0"
+    },
+    "colab": {
+      "provenance": []
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}