HarpreetK commited on
Commit
773c534
·
verified ·
1 Parent(s): 03ec278

Upload gpt_oss_nostepback24.ipynb

Browse files
Files changed (1) hide show
  1. gpt_oss_nostepback24.ipynb +1717 -0
gpt_oss_nostepback24.ipynb ADDED
@@ -0,0 +1,1717 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "7a9b41cb",
6
+ "metadata": {
7
+ "editable": true,
8
+ "papermill": {
9
+ "duration": 0.004165,
10
+ "end_time": "2026-04-12T05:01:44.648318+00:00",
11
+ "exception": false,
12
+ "start_time": "2026-04-12T05:01:44.644153+00:00",
13
+ "status": "completed"
14
+ },
15
+ "tags": [],
16
+ "id": "7a9b41cb"
17
+ },
18
+ "source": [
19
+ "\n",
20
+ "# Setup The Environment"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "id": "73e5d3dc",
27
+ "metadata": {
28
+ "execution": {
29
+ "iopub.execute_input": "2026-04-12T05:01:44.656112Z",
30
+ "iopub.status.busy": "2026-04-12T05:01:44.655548Z",
31
+ "iopub.status.idle": "2026-04-12T05:01:44.660512Z",
32
+ "shell.execute_reply": "2026-04-12T05:01:44.660110Z"
33
+ },
34
+ "papermill": {
35
+ "duration": 0.009642,
36
+ "end_time": "2026-04-12T05:01:44.661341+00:00",
37
+ "exception": false,
38
+ "start_time": "2026-04-12T05:01:44.651699+00:00",
39
+ "status": "completed"
40
+ },
41
+ "tags": [],
42
+ "id": "73e5d3dc"
43
+ },
44
+ "outputs": [],
45
+ "source": [
46
+ "# Track Overall Time\n",
47
+ "import time\n",
48
+ "global_deadline = time.perf_counter() + 5*3600\n",
49
+ "global_remaining = global_deadline - time.perf_counter()\n",
50
+ "cutoff_duration = global_remaining - 350\n",
51
+ "def get_global_remaining():\n",
52
+ " return max(0, global_deadline - time.perf_counter())"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": null,
58
+ "id": "5f9da4be",
59
+ "metadata": {
60
+ "execution": {
61
+ "iopub.execute_input": "2026-04-12T05:01:44.669660Z",
62
+ "iopub.status.busy": "2026-04-12T05:01:44.669504Z",
63
+ "iopub.status.idle": "2026-04-12T05:01:44.672401Z",
64
+ "shell.execute_reply": "2026-04-12T05:01:44.672047Z"
65
+ },
66
+ "papermill": {
67
+ "duration": 0.007624,
68
+ "end_time": "2026-04-12T05:01:44.673227+00:00",
69
+ "exception": false,
70
+ "start_time": "2026-04-12T05:01:44.665603+00:00",
71
+ "status": "completed"
72
+ },
73
+ "tags": [],
74
+ "id": "5f9da4be"
75
+ },
76
+ "outputs": [],
77
+ "source": [
78
+ "import os, sys\n",
79
+ "original_pythonpath = os.environ.get(\"PYTHONPATH\", \"\")\n",
80
+ "path1 = '/kaggle/input/datasets/hpkaur34/gptoss/Gpt-oss'\n",
81
+ "path2 = '/kaggle/usr/lib/notebooks/hpkaur34/install_utility_nemo_run/'\n",
82
+ "new_paths = f\"{path1}:{path2}\"\n",
83
+ "merged_pythonpath = f\"{new_paths}:{original_pythonpath}\" if original_pythonpath else new_path\n",
84
+ "os.environ[\"PYTHONPATH\"] = merged_pythonpath\n",
85
+ "sys.path.append('/kaggle/input/datasets/hpkaur34/gptoss/Gpt-oss')\n",
86
+ "sys.path.append('/kaggle/usr/lib/notebooks/hpkaur34/install_utility_nemo_run/')"
87
+ ]
88
+ },
89
+ {
90
+ "cell_type": "code",
91
+ "execution_count": null,
92
+ "id": "66edb1b7",
93
+ "metadata": {
94
+ "execution": {
95
+ "iopub.execute_input": "2026-04-12T05:01:44.680830Z",
96
+ "iopub.status.busy": "2026-04-12T05:01:44.680652Z",
97
+ "iopub.status.idle": "2026-04-12T05:05:01.636413Z",
98
+ "shell.execute_reply": "2026-04-12T05:05:01.635973Z"
99
+ },
100
+ "papermill": {
101
+ "duration": 196.960959,
102
+ "end_time": "2026-04-12T05:05:01.637603+00:00",
103
+ "exception": false,
104
+ "start_time": "2026-04-12T05:01:44.676644+00:00",
105
+ "status": "completed"
106
+ },
107
+ "tags": [],
108
+ "id": "66edb1b7"
109
+ },
110
+ "outputs": [],
111
+ "source": [
112
+ "import subprocess\n",
113
+ "def set_env(input_archive, temp_dir):\n",
114
+ " if not os.path.exists(temp_dir):\n",
115
+ " os.makedirs(temp_dir, exist_ok=True)\n",
116
+ " subprocess.run(['tar', '-xzf', input_archive, '-C', temp_dir], check=True)\n",
117
+ " subprocess.run([\n",
118
+ " sys.executable,\n",
119
+ " '-m',\n",
120
+ " 'pip',\n",
121
+ " 'install',\n",
122
+ " '--no-index',\n",
123
+ " '--find-links',\n",
124
+ " f'{temp_dir}/wheels',\n",
125
+ " 'paramiko',\n",
126
+ " 'math_verify',\n",
127
+ " 'litellm',\n",
128
+ " 'flashinfer-python',\n",
129
+ " 'vllm==0.11.2',\n",
130
+ " 'openai_harmony',\n",
131
+ " ], check=False)\n",
132
+ "\n",
133
+ "try:\n",
134
+ " set_env(\n",
135
+ " input_archive='/kaggle/usr/lib/notebooks/hpkaur34/aimo_utility_copy/wheels.tar.gz',\n",
136
+ " temp_dir='/kaggle/tmp/setup'\n",
137
+ " )\n",
138
+ "except Exception as e:\n",
139
+ " print(f\"⚠️ set_env failed: {e}\")\n",
140
+ " print(\"Continuing execution...\")"
141
+ ]
142
+ },
143
+ {
144
+ "cell_type": "code",
145
+ "execution_count": null,
146
+ "id": "cd9b33b0",
147
+ "metadata": {
148
+ "execution": {
149
+ "iopub.execute_input": "2026-04-12T05:05:01.648387Z",
150
+ "iopub.status.busy": "2026-04-12T05:05:01.648226Z",
151
+ "iopub.status.idle": "2026-04-12T05:05:05.396043Z",
152
+ "shell.execute_reply": "2026-04-12T05:05:05.395545Z"
153
+ },
154
+ "papermill": {
155
+ "duration": 3.754774,
156
+ "end_time": "2026-04-12T05:05:05.397566+00:00",
157
+ "exception": false,
158
+ "start_time": "2026-04-12T05:05:01.642792+00:00",
159
+ "status": "completed"
160
+ },
161
+ "tags": [],
162
+ "id": "cd9b33b0"
163
+ },
164
+ "outputs": [],
165
+ "source": [
166
+ "import os\n",
167
+ "os.environ[\"CUDA_LAUNCH_BLOCKING\"] = \"1\"\n",
168
+ "import torch"
169
+ ]
170
+ },
171
+ {
172
+ "cell_type": "code",
173
+ "execution_count": null,
174
+ "id": "162dc6d1",
175
+ "metadata": {
176
+ "execution": {
177
+ "iopub.execute_input": "2026-04-12T05:05:05.408255Z",
178
+ "iopub.status.busy": "2026-04-12T05:05:05.408020Z",
179
+ "iopub.status.idle": "2026-04-12T05:05:05.411789Z",
180
+ "shell.execute_reply": "2026-04-12T05:05:05.411435Z"
181
+ },
182
+ "papermill": {
183
+ "duration": 0.009988,
184
+ "end_time": "2026-04-12T05:05:05.412633+00:00",
185
+ "exception": false,
186
+ "start_time": "2026-04-12T05:05:05.402645+00:00",
187
+ "status": "completed"
188
+ },
189
+ "tags": [],
190
+ "id": "162dc6d1"
191
+ },
192
+ "outputs": [],
193
+ "source": [
194
+ "\"\"\"\n",
195
+ "import logging\n",
196
+ "logging.basicConfig(level=logging.DEBUG)\n",
197
+ "\"\"\""
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "id": "ddfb0193",
204
+ "metadata": {
205
+ "execution": {
206
+ "iopub.execute_input": "2026-04-12T05:05:05.422698Z",
207
+ "iopub.status.busy": "2026-04-12T05:05:05.422543Z",
208
+ "iopub.status.idle": "2026-04-12T05:05:07.847229Z",
209
+ "shell.execute_reply": "2026-04-12T05:05:07.846480Z"
210
+ },
211
+ "papermill": {
212
+ "duration": 2.431309,
213
+ "end_time": "2026-04-12T05:05:07.848706+00:00",
214
+ "exception": false,
215
+ "start_time": "2026-04-12T05:05:05.417397+00:00",
216
+ "status": "completed"
217
+ },
218
+ "tags": [],
219
+ "id": "ddfb0193"
220
+ },
221
+ "outputs": [],
222
+ "source": [
223
+ "import asyncio\n",
224
+ "import torch\n",
225
+ "import subprocess\n",
226
+ "import warnings\n",
227
+ "import glob\n",
228
+ "import kaggle_evaluation.aimo_3_inference_server\n",
229
+ "import pandas as pd\n",
230
+ "import traceback\n",
231
+ "import nest_asyncio\n",
232
+ "import httpx\n",
233
+ "import re\n",
234
+ "import time\n",
235
+ "import copy\n",
236
+ "import json\n",
237
+ "import requests\n",
238
+ "import pandas as pd\n",
239
+ "import polars as pl\n",
240
+ "from collections import Counter\n",
241
+ "from typing import List\n",
242
+ "import secrets\n",
243
+ "pd.set_option('display.max_colwidth', None)\n",
244
+ "warnings.filterwarnings(\"ignore\", category=SyntaxWarning)\n",
245
+ "nest_asyncio.apply()\n",
246
+ "os.environ[\"TORCH_COMPILE_DISABLE\"] = \"1\"\n",
247
+ "os.environ[\"TORCHDYNAMO_DISABLE\"] = \"1\"\n",
248
+ "os.environ['TRANSFORMERS_NO_FLAX'] = '1'\n",
249
+ "os.environ['CUDA_VISIBLE_DEVICES'] = '0'\n",
250
+ "os.environ['TOKENIZERS_PARALLELISM'] = 'false'\n",
251
+ "os.environ['TRITON_PTXAS_PATH'] = '/usr/local/cuda/bin/ptxas'\n",
252
+ "os.environ['TIKTOKEN_RS_CACHE_DIR']= \"/kaggle/input/datasets/hpkaur34/harmony-encoding\"\n",
253
+ "os.environ[\"TORCH_CUDA_ARCH_LIST\"] = '9.0'\n",
254
+ "#os.environ[\"VLLM_USE_FLASHINFER_SAMPLER\"]= \"1\"\n",
255
+ "# Below will change in kaggle\n",
256
+ "from collections import Counter, defaultdict"
257
+ ]
258
+ },
259
+ {
260
+ "cell_type": "code",
261
+ "execution_count": null,
262
+ "id": "aad4e792",
263
+ "metadata": {
264
+ "execution": {
265
+ "iopub.execute_input": "2026-04-12T05:05:07.859567Z",
266
+ "iopub.status.busy": "2026-04-12T05:05:07.859290Z",
267
+ "iopub.status.idle": "2026-04-12T05:05:07.861900Z",
268
+ "shell.execute_reply": "2026-04-12T05:05:07.861517Z"
269
+ },
270
+ "papermill": {
271
+ "duration": 0.008918,
272
+ "end_time": "2026-04-12T05:05:07.862720+00:00",
273
+ "exception": false,
274
+ "start_time": "2026-04-12T05:05:07.853802+00:00",
275
+ "status": "completed"
276
+ },
277
+ "tags": [],
278
+ "id": "aad4e792"
279
+ },
280
+ "outputs": [],
281
+ "source": [
282
+ "# This will change in kaggle\n",
283
+ "os.environ[\"TORCHINDUCTOR_CACHE_DIR\"] = \"torch_cache\"\n"
284
+ ]
285
+ },
286
+ {
287
+ "cell_type": "code",
288
+ "execution_count": null,
289
+ "id": "3a837b51",
290
+ "metadata": {
291
+ "execution": {
292
+ "iopub.execute_input": "2026-04-12T05:05:07.873064Z",
293
+ "iopub.status.busy": "2026-04-12T05:05:07.872913Z",
294
+ "iopub.status.idle": "2026-04-12T05:05:53.870524Z",
295
+ "shell.execute_reply": "2026-04-12T05:05:53.870030Z"
296
+ },
297
+ "papermill": {
298
+ "duration": 46.004343,
299
+ "end_time": "2026-04-12T05:05:53.871905+00:00",
300
+ "exception": false,
301
+ "start_time": "2026-04-12T05:05:07.867562+00:00",
302
+ "status": "completed"
303
+ },
304
+ "tags": [],
305
+ "id": "3a837b51"
306
+ },
307
+ "outputs": [],
308
+ "source": [
309
+ "from nemo_skills.code_execution.sandbox import get_sandbox\n",
310
+ "from nemo_skills.inference.model import get_code_execution_model\n",
311
+ "from nemo_skills.prompt.utils import get_prompt\n",
312
+ "from nemo_skills.inference.model import get_model"
313
+ ]
314
+ },
315
+ {
316
+ "cell_type": "markdown",
317
+ "id": "b69b4ab8",
318
+ "metadata": {
319
+ "papermill": {
320
+ "duration": 0.005047,
321
+ "end_time": "2026-04-12T05:05:53.882182+00:00",
322
+ "exception": false,
323
+ "start_time": "2026-04-12T05:05:53.877135+00:00",
324
+ "status": "completed"
325
+ },
326
+ "tags": [],
327
+ "id": "b69b4ab8"
328
+ },
329
+ "source": [
330
+ "# Configuration Parameters"
331
+ ]
332
+ },
333
+ {
334
+ "cell_type": "code",
335
+ "execution_count": null,
336
+ "id": "e29f99c0",
337
+ "metadata": {
338
+ "execution": {
339
+ "iopub.execute_input": "2026-04-12T05:05:53.892569Z",
340
+ "iopub.status.busy": "2026-04-12T05:05:53.892197Z",
341
+ "iopub.status.idle": "2026-04-12T05:05:53.895885Z",
342
+ "shell.execute_reply": "2026-04-12T05:05:53.895524Z"
343
+ },
344
+ "papermill": {
345
+ "duration": 0.009849,
346
+ "end_time": "2026-04-12T05:05:53.896584+00:00",
347
+ "exception": false,
348
+ "start_time": "2026-04-12T05:05:53.886735+00:00",
349
+ "status": "completed"
350
+ },
351
+ "tags": [],
352
+ "id": "e29f99c0"
353
+ },
354
+ "outputs": [],
355
+ "source": [
356
+ "host = \"127.0.0.1\"\n",
357
+ "port = 5000\n",
358
+ "tp_size = 1\n",
359
+ "max_public = 10\n",
360
+ "max_tokens = 80000\n",
361
+ "max_input_tokens = 1800\n",
362
+ "tokens_to_generate = 78200 - 10\n",
363
+ "max_batch_size = 8\n",
364
+ "timeout_seconds = 300\n",
365
+ "global_buffer = 350\n",
366
+ "finish_at_last_n = 2\n",
367
+ "max_code_output_characters = 1100\n",
368
+ "code_execution_timeout = 5\n",
369
+ "max_code_executions = 125\n",
370
+ "g_score = 0\n",
371
+ "g_count = 0\n",
372
+ "prompt_score = Counter()\n",
373
+ "sampling_params = {\n",
374
+ " \"tokens_to_generate\": tokens_to_generate,\n",
375
+ " \"temperature\": 1, # 0.2,\n",
376
+ " \"top_p\": 1,\n",
377
+ "}\n",
378
+ "\n",
379
+ "thoughts = [\"\"] * 50\n",
380
+ "thoughts = thoughts[:max_batch_size]\n",
381
+ "i = 0"
382
+ ]
383
+ },
384
+ {
385
+ "cell_type": "code",
386
+ "execution_count": null,
387
+ "id": "78bd61b1",
388
+ "metadata": {
389
+ "execution": {
390
+ "iopub.execute_input": "2026-04-12T05:05:53.906656Z",
391
+ "iopub.status.busy": "2026-04-12T05:05:53.906492Z",
392
+ "iopub.status.idle": "2026-04-12T05:05:53.908783Z",
393
+ "shell.execute_reply": "2026-04-12T05:05:53.908391Z"
394
+ },
395
+ "papermill": {
396
+ "duration": 0.008556,
397
+ "end_time": "2026-04-12T05:05:53.909622+00:00",
398
+ "exception": false,
399
+ "start_time": "2026-04-12T05:05:53.901066+00:00",
400
+ "status": "completed"
401
+ },
402
+ "tags": [],
403
+ "id": "78bd61b1"
404
+ },
405
+ "outputs": [],
406
+ "source": [
407
+ "model_path = \"/kaggle/input/models/hpkaur34/gpt-oss-120b/transformers/default/1\""
408
+ ]
409
+ },
410
+ {
411
+ "cell_type": "code",
412
+ "execution_count": null,
413
+ "id": "f15f7036",
414
+ "metadata": {
415
+ "execution": {
416
+ "iopub.execute_input": "2026-04-12T05:05:53.919963Z",
417
+ "iopub.status.busy": "2026-04-12T05:05:53.919804Z",
418
+ "iopub.status.idle": "2026-04-12T05:05:53.922192Z",
419
+ "shell.execute_reply": "2026-04-12T05:05:53.921752Z"
420
+ },
421
+ "papermill": {
422
+ "duration": 0.008918,
423
+ "end_time": "2026-04-12T05:05:53.923346+00:00",
424
+ "exception": false,
425
+ "start_time": "2026-04-12T05:05:53.914428+00:00",
426
+ "status": "completed"
427
+ },
428
+ "tags": [],
429
+ "id": "f15f7036"
430
+ },
431
+ "outputs": [],
432
+ "source": [
433
+ "import json"
434
+ ]
435
+ },
436
+ {
437
+ "cell_type": "markdown",
438
+ "id": "848f046a",
439
+ "metadata": {
440
+ "papermill": {
441
+ "duration": 0.004638,
442
+ "end_time": "2026-04-12T05:05:53.932771+00:00",
443
+ "exception": false,
444
+ "start_time": "2026-04-12T05:05:53.928133+00:00",
445
+ "status": "completed"
446
+ },
447
+ "tags": [],
448
+ "id": "848f046a"
449
+ },
450
+ "source": [
451
+ "# Start Server - Load Model & Sandbox"
452
+ ]
453
+ },
454
+ {
455
+ "cell_type": "code",
456
+ "execution_count": null,
457
+ "id": "a36cbdd9",
458
+ "metadata": {
459
+ "execution": {
460
+ "iopub.execute_input": "2026-04-12T05:05:53.943233Z",
461
+ "iopub.status.busy": "2026-04-12T05:05:53.943062Z",
462
+ "iopub.status.idle": "2026-04-12T05:05:53.946339Z",
463
+ "shell.execute_reply": "2026-04-12T05:05:53.945949Z"
464
+ },
465
+ "papermill": {
466
+ "duration": 0.010016,
467
+ "end_time": "2026-04-12T05:05:53.947523+00:00",
468
+ "exception": false,
469
+ "start_time": "2026-04-12T05:05:53.937507+00:00",
470
+ "status": "completed"
471
+ },
472
+ "tags": [],
473
+ "id": "a36cbdd9"
474
+ },
475
+ "outputs": [],
476
+ "source": [
477
+ "server_started = False\n",
478
+ "def load_model():\n",
479
+ " cmd = [\n",
480
+ " \"python\",\n",
481
+ " \"-m\",\n",
482
+ " \"nemo_skills.inference.server.serve_vllm\",\n",
483
+ " f\"--model={model_path}\",\n",
484
+ " \"--port=5000\",\n",
485
+ " \"--num_gpus=1\",\n",
486
+ " \"--max_model_len=80000\",\n",
487
+ " \"--max_num_batched_tokens=65000\",\n",
488
+ " \"--max_num_seqs=13\",\n",
489
+ " \"--max-cudagraph-capture-size=2048\",\n",
490
+ " \"--gpu_memory_utilization=0.96\",\n",
491
+ " \"--kv_cache_dtype=fp8_e4m3\",\n",
492
+ " \"--stream-interval=200\",\n",
493
+ " \"--enable-prefix-caching\",\n",
494
+ " \"--uvicorn-log-level debug\",\n",
495
+ " \"--enable-log-requests\",\n",
496
+ " \"--enable-log-outputs\",\n",
497
+ " \"--async-scheduling\",\n",
498
+ " ]\n",
499
+ "\n",
500
+ " log_file = open(\"vllm.log\", \"w\")\n",
501
+ " vllm_server = subprocess.Popen(\n",
502
+ " cmd,\n",
503
+ " stdout=log_file,\n",
504
+ " stderr=log_file,\n",
505
+ " text=True,\n",
506
+ " bufsize=1 # line-buffered\n",
507
+ " )\n",
508
+ " return vllm_server"
509
+ ]
510
+ },
511
+ {
512
+ "cell_type": "code",
513
+ "execution_count": null,
514
+ "id": "8824bf5d",
515
+ "metadata": {
516
+ "execution": {
517
+ "iopub.execute_input": "2026-04-12T05:05:53.957943Z",
518
+ "iopub.status.busy": "2026-04-12T05:05:53.957786Z",
519
+ "iopub.status.idle": "2026-04-12T05:05:53.960536Z",
520
+ "shell.execute_reply": "2026-04-12T05:05:53.960150Z"
521
+ },
522
+ "papermill": {
523
+ "duration": 0.008917,
524
+ "end_time": "2026-04-12T05:05:53.961372+00:00",
525
+ "exception": false,
526
+ "start_time": "2026-04-12T05:05:53.952455+00:00",
527
+ "status": "completed"
528
+ },
529
+ "tags": [],
530
+ "id": "8824bf5d"
531
+ },
532
+ "outputs": [],
533
+ "source": [
534
+ "vllm_server=load_model()"
535
+ ]
536
+ },
537
+ {
538
+ "cell_type": "code",
539
+ "execution_count": null,
540
+ "id": "563c5247",
541
+ "metadata": {
542
+ "execution": {
543
+ "iopub.execute_input": "2026-04-12T05:05:53.971887Z",
544
+ "iopub.status.busy": "2026-04-12T05:05:53.971705Z",
545
+ "iopub.status.idle": "2026-04-12T05:05:53.974875Z",
546
+ "shell.execute_reply": "2026-04-12T05:05:53.974510Z"
547
+ },
548
+ "papermill": {
549
+ "duration": 0.009755,
550
+ "end_time": "2026-04-12T05:05:53.976064+00:00",
551
+ "exception": false,
552
+ "start_time": "2026-04-12T05:05:53.966309+00:00",
553
+ "status": "completed"
554
+ },
555
+ "tags": [],
556
+ "id": "563c5247"
557
+ },
558
+ "outputs": [],
559
+ "source": [
560
+ "def wait_for_server(url=f\"http://{host}:{port}\", timeout=1200):\n",
561
+ " start = time.perf_counter()\n",
562
+ " while True:\n",
563
+ " try:\n",
564
+ " r = requests.get(f\"{url}/docs\")\n",
565
+ " if r.status_code == 200:\n",
566
+ " print(\"✅ Server is ready\",time.perf_counter()-start)\n",
567
+ " return True\n",
568
+ " except Exception:\n",
569
+ " pass\n",
570
+ "\n",
571
+ " if time.perf_counter() - start > timeout:\n",
572
+ " raise TimeoutError(\"Server did not start in time\")\n",
573
+ "\n",
574
+ " time.sleep(1)"
575
+ ]
576
+ },
577
+ {
578
+ "cell_type": "code",
579
+ "execution_count": null,
580
+ "id": "33c6222d",
581
+ "metadata": {
582
+ "execution": {
583
+ "iopub.execute_input": "2026-04-12T05:05:53.986603Z",
584
+ "iopub.status.busy": "2026-04-12T05:05:53.986295Z",
585
+ "iopub.status.idle": "2026-04-12T05:05:53.988966Z",
586
+ "shell.execute_reply": "2026-04-12T05:05:53.988576Z"
587
+ },
588
+ "papermill": {
589
+ "duration": 0.009033,
590
+ "end_time": "2026-04-12T05:05:53.989770+00:00",
591
+ "exception": false,
592
+ "start_time": "2026-04-12T05:05:53.980737+00:00",
593
+ "status": "completed"
594
+ },
595
+ "tags": [],
596
+ "id": "33c6222d"
597
+ },
598
+ "outputs": [],
599
+ "source": [
600
+ "def sandbox_server():\n",
601
+ " log_file = open(\"sandbox.log\", \"w\")\n",
602
+ " sandbox_process = subprocess.Popen(\n",
603
+ " [\"python\", \"-m\", \"nemo_skills.code_execution.local_sandbox.local_sandbox_server\"],\n",
604
+ " stdout=log_file,\n",
605
+ " stderr=log_file,\n",
606
+ " text=True,\n",
607
+ " bufsize=1)\n",
608
+ "\n",
609
+ " time.sleep(3)"
610
+ ]
611
+ },
612
+ {
613
+ "cell_type": "code",
614
+ "execution_count": null,
615
+ "id": "75ac913f",
616
+ "metadata": {
617
+ "execution": {
618
+ "iopub.execute_input": "2026-04-12T05:05:53.999737Z",
619
+ "iopub.status.busy": "2026-04-12T05:05:53.999550Z",
620
+ "iopub.status.idle": "2026-04-12T05:05:59.047417Z",
621
+ "shell.execute_reply": "2026-04-12T05:05:59.046670Z"
622
+ },
623
+ "papermill": {
624
+ "duration": 5.054699,
625
+ "end_time": "2026-04-12T05:05:59.048988+00:00",
626
+ "exception": false,
627
+ "start_time": "2026-04-12T05:05:53.994289+00:00",
628
+ "status": "completed"
629
+ },
630
+ "tags": [],
631
+ "id": "75ac913f"
632
+ },
633
+ "outputs": [],
634
+ "source": [
635
+ "time.sleep(2)\n",
636
+ "sandbox_server()\n",
637
+ "sandbox = get_sandbox() # localhost by default"
638
+ ]
639
+ },
640
+ {
641
+ "cell_type": "markdown",
642
+ "id": "0f199401",
643
+ "metadata": {
644
+ "papermill": {
645
+ "duration": 0.004869,
646
+ "end_time": "2026-04-12T05:05:59.059135+00:00",
647
+ "exception": false,
648
+ "start_time": "2026-04-12T05:05:59.054266+00:00",
649
+ "status": "completed"
650
+ },
651
+ "tags": [],
652
+ "id": "0f199401"
653
+ },
654
+ "source": [
655
+ "# Prompt Types and Updating Prompt"
656
+ ]
657
+ },
658
+ {
659
+ "cell_type": "code",
660
+ "execution_count": null,
661
+ "id": "ba16a0bb",
662
+ "metadata": {
663
+ "execution": {
664
+ "iopub.execute_input": "2026-04-12T05:05:59.070015Z",
665
+ "iopub.status.busy": "2026-04-12T05:05:59.069827Z",
666
+ "iopub.status.idle": "2026-04-12T05:05:59.073193Z",
667
+ "shell.execute_reply": "2026-04-12T05:05:59.072795Z"
668
+ },
669
+ "papermill": {
670
+ "duration": 0.009911,
671
+ "end_time": "2026-04-12T05:05:59.074008+00:00",
672
+ "exception": false,
673
+ "start_time": "2026-04-12T05:05:59.064097+00:00",
674
+ "status": "completed"
675
+ },
676
+ "tags": [],
677
+ "id": "ba16a0bb"
678
+ },
679
+ "outputs": [],
680
+ "source": [
681
+ "default_prompt = (\n",
682
+ " 'You are an elite mathematical problem solver with expertise at the International '\n",
683
+ " 'Mathematical Olympiad (IMO) level. Your goal is to find the correct answer through '\n",
684
+ " 'rigorous mathematical reasoning.\\n\\n'\n",
685
+ "\n",
686
+ " '# Problem-Solving Approach:\\n'\n",
687
+ " '1. UNDERSTAND: Carefully read and rephrase the problem in your own words. '\n",
688
+ " 'Identify what is given, what needs to be found, and any constraints.\\n'\n",
689
+ " '2. EXPLORE: Consider multiple solution strategies. Think about relevant theorems, '\n",
690
+ " 'techniques, patterns, or analogous problems. Don\\'t commit to one approach immediately.\\n'\n",
691
+ " '3. PLAN: Select the most promising approach and outline key steps before executing.\\n'\n",
692
+ " '4. EXECUTE: Work through your solution methodically. Show all reasoning steps clearly.\\n'\n",
693
+ " '5. VERIFY: Check your answer by substituting back, testing edge cases, or using '\n",
694
+ " 'alternative methods. Ensure logical consistency throughout.\\n\\n'\n",
695
+ "\n",
696
+ " '# Mathematical Reasoning Principles:\\n'\n",
697
+ " '- Break complex problems into smaller, manageable sub-problems\\n'\n",
698
+ " '- Look for patterns, symmetries, and special cases that provide insight\\n'\n",
699
+ " '- Use concrete examples to build intuition before generalizing\\n'\n",
700
+ " '- Consider extreme cases and boundary conditions\\n'\n",
701
+ " '- If stuck, try working backwards from the desired result\\n'\n",
702
+ " '- Be willing to restart with a different approach if needed\\n\\n'\n",
703
+ "\n",
704
+ " '# Verification Requirements:\\n'\n",
705
+ " '- Cross-check arithmetic and algebraic manipulations\\n'\n",
706
+ " '- Verify that your solution satisfies all problem constraints\\n'\n",
707
+ " '- Test your answer with simple cases or special values when possible\\n'\n",
708
+ " '- Ensure dimensional consistency and reasonableness of the result\\n\\n'\n",
709
+ "\n",
710
+ " \"#RESPONSE FORMAT:\\n\\n\"\n",
711
+ " \"The final answer must be a non-negative integer.\\n. Instead of the \\\\boxed{} format use json format. Follow the instructions for the format-\"\n",
712
+ " ' \"Answer\": <non-negative integer>,\"Confidence\": <number between 0 and 1>'\n",
713
+ " \"Do not output any additional reasoning after this JSON.\\n\"\n",
714
+ " \"Do not output any additional reasoning after this JSON.\\n\"\n",
715
+ " )\n",
716
+ ""
717
+ ]
718
+ },
719
+ {
720
+ "cell_type": "code",
721
+ "execution_count": null,
722
+ "id": "e61b75fe",
723
+ "metadata": {
724
+ "execution": {
725
+ "iopub.execute_input": "2026-04-12T05:05:59.084817Z",
726
+ "iopub.status.busy": "2026-04-12T05:05:59.084430Z",
727
+ "iopub.status.idle": "2026-04-12T05:06:00.071004Z",
728
+ "shell.execute_reply": "2026-04-12T05:06:00.070468Z"
729
+ },
730
+ "papermill": {
731
+ "duration": 0.993574,
732
+ "end_time": "2026-04-12T05:06:00.072353+00:00",
733
+ "exception": false,
734
+ "start_time": "2026-04-12T05:05:59.078779+00:00",
735
+ "status": "completed"
736
+ },
737
+ "tags": [],
738
+ "id": "e61b75fe"
739
+ },
740
+ "outputs": [],
741
+ "source": [
742
+ "# Below will change\n",
743
+ "system_message='{system_prompt}'\n",
744
+ "prompt_template = get_prompt(prompt_config='gpt-oss/math',system_message=system_message,tokenizer=model_path,code_tags=\"gpt-oss\")\n",
745
+ "chat_template_kwargs = {\n",
746
+ " \"builtin_tools\": [\"python\"],\n",
747
+ " \"reasoning_effort\":\"high\"\n",
748
+ "\n",
749
+ "}"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": null,
755
+ "id": "95a2110d",
756
+ "metadata": {
757
+ "execution": {
758
+ "iopub.execute_input": "2026-04-12T05:06:00.083120Z",
759
+ "iopub.status.busy": "2026-04-12T05:06:00.082940Z",
760
+ "iopub.status.idle": "2026-04-12T05:06:00.085743Z",
761
+ "shell.execute_reply": "2026-04-12T05:06:00.085363Z"
762
+ },
763
+ "papermill": {
764
+ "duration": 0.009146,
765
+ "end_time": "2026-04-12T05:06:00.086560+00:00",
766
+ "exception": false,
767
+ "start_time": "2026-04-12T05:06:00.077414+00:00",
768
+ "status": "completed"
769
+ },
770
+ "tags": [],
771
+ "id": "95a2110d"
772
+ },
773
+ "outputs": [],
774
+ "source": [
775
+ "def safe_concat(a, b,function_name):\n",
776
+ " if a is None or b is None:\n",
777
+ " raise ValueError(f\"Cannot concatenate: a={a}, b={b}, Error Raised from function {function_name}\")\n",
778
+ " return a + b"
779
+ ]
780
+ },
781
+ {
782
+ "cell_type": "markdown",
783
+ "id": "21d172dc",
784
+ "metadata": {
785
+ "papermill": {
786
+ "duration": 0.004736,
787
+ "end_time": "2026-04-12T05:06:00.096017+00:00",
788
+ "exception": false,
789
+ "start_time": "2026-04-12T05:06:00.091281+00:00",
790
+ "status": "completed"
791
+ },
792
+ "tags": [],
793
+ "id": "21d172dc"
794
+ },
795
+ "source": [
796
+ "# Data Extraction & Early Stopping"
797
+ ]
798
+ },
799
+ {
800
+ "cell_type": "code",
801
+ "execution_count": null,
802
+ "id": "e27ba473",
803
+ "metadata": {
804
+ "execution": {
805
+ "iopub.execute_input": "2026-04-12T05:06:00.106688Z",
806
+ "iopub.status.busy": "2026-04-12T05:06:00.106508Z",
807
+ "iopub.status.idle": "2026-04-12T05:06:00.113188Z",
808
+ "shell.execute_reply": "2026-04-12T05:06:00.112798Z"
809
+ },
810
+ "papermill": {
811
+ "duration": 0.013124,
812
+ "end_time": "2026-04-12T05:06:00.114007+00:00",
813
+ "exception": false,
814
+ "start_time": "2026-04-12T05:06:00.100883+00:00",
815
+ "status": "completed"
816
+ },
817
+ "tags": [],
818
+ "id": "e27ba473"
819
+ },
820
+ "outputs": [],
821
+ "source": [
822
+ "class Result:\n",
823
+ " def __init__(self):\n",
824
+ " self.early_stop_flag = False\n",
825
+ " def best_voted_answer(self):\n",
826
+ " return self.best_answer\n",
827
+ "\n",
828
+ " def majority_voting(self, answer_list):\n",
829
+ " count = defaultdict(float)\n",
830
+ " # Keep raw list separate; filter into valid_answers\n",
831
+ " self.answer_list = answer_list\n",
832
+ " self.valid_answers = [x[\"Answer\"] for x in self.answer_list if x[\"Answer\"] != -1]\n",
833
+ " print(\"Answer_list after popping -1\", self.valid_answers, \"%%%%\")\n",
834
+ "\n",
835
+ " # BUG FIX: set fallback when all answers are invalid\n",
836
+ " if len(self.valid_answers) == 0:\n",
837
+ " self.best_answer = None\n",
838
+ " self.best_count = 0\n",
839
+ " self.second_count = 0\n",
840
+ " self.sorted_answers = []\n",
841
+ " return\n",
842
+ "\n",
843
+ " for a in self.valid_answers:\n",
844
+ " count[a] += 1\n",
845
+ " self.sorted_answers = sorted(count.items(), key=lambda x: x[1], reverse=True)\n",
846
+ "\n",
847
+ " self.best_answer, self.best_count = self.sorted_answers[0]\n",
848
+ " self.second_count = self.sorted_answers[1][1] if len(self.sorted_answers) > 1 else 0\n",
849
+ "\n",
850
+ " if (\n",
851
+ " self.best_count == 1\n",
852
+ " and self.best_answer == 0\n",
853
+ " and len(self.sorted_answers) > 1\n",
854
+ " and self.sorted_answers[1] is not None\n",
855
+ " ):\n",
856
+ "\n",
857
+ " self.best_answer, self.best_count = self.sorted_answers[1]\n",
858
+ "\n",
859
+ "\n",
860
+ " def early_stop(self, answer_list, num_done):\n",
861
+ " print(\"Num_done is\",num_done)\n",
862
+ " self.num_done = num_done\n",
863
+ " self.majority_voting(answer_list)\n",
864
+ " n_valid = len(self.valid_answers)\n",
865
+ " best = self.best_count\n",
866
+ " gap = self.best_count - self.second_count\n",
867
+ " print(f\"Num done: {self.num_done}, Valid answers: {n_valid}, \"\n",
868
+ " f\"Best count: {best}, Second count: {self.second_count}\")\n",
869
+ "\n",
870
+ " if n_valid == 0:\n",
871
+ " return False\n",
872
+ "\n",
873
+ " if best >= 3 and gap >= 1:\n",
874
+ " self.early_stop_flag = True\n",
875
+ " print(f\">>> EARLY STOP at {self.num_done} completions | \"\n",
876
+ " f\"best={self.best_answer} (count={best}, gap={gap})\")\n",
877
+ "\n",
878
+ " return self.early_stop_flag\n",
879
+ "\n",
880
+ " def get_best_answer(self,answer_list, num_done, flag):\n",
881
+ " if not flag:\n",
882
+ " self.majority_voting(answer_list)\n",
883
+ " else:\n",
884
+ " self.early_stop(answer_list, num_done)\n",
885
+ " return self.best_voted_answer(), self.early_stop_flag\n"
886
+ ]
887
+ },
888
+ {
889
+ "cell_type": "code",
890
+ "execution_count": null,
891
+ "id": "08da144a",
892
+ "metadata": {
893
+ "execution": {
894
+ "iopub.execute_input": "2026-04-12T05:06:00.124730Z",
895
+ "iopub.status.busy": "2026-04-12T05:06:00.124556Z",
896
+ "iopub.status.idle": "2026-04-12T05:06:00.130593Z",
897
+ "shell.execute_reply": "2026-04-12T05:06:00.130188Z"
898
+ },
899
+ "papermill": {
900
+ "duration": 0.012614,
901
+ "end_time": "2026-04-12T05:06:00.131471+00:00",
902
+ "exception": false,
903
+ "start_time": "2026-04-12T05:06:00.118857+00:00",
904
+ "status": "completed"
905
+ },
906
+ "tags": [],
907
+ "id": "08da144a"
908
+ },
909
+ "outputs": [],
910
+ "source": [
911
+ "import re, requests\n",
912
+ "\n",
913
+ "class Answer:\n",
914
+ " def __init__(self):\n",
915
+ " self.best_answer = None\n",
916
+ " self.input_message = \"\"\n",
917
+ " self.best_count = 0\n",
918
+ " self.second_count = 0\n",
919
+ " self.answer_list = [] # ← was None, init as empty list\n",
920
+ " self.early_stop_flag = False\n",
921
+ " self.sorted_answers = []\n",
922
+ " self.valid_answers = [] # ← filtered list (no -1s), kept separate\n",
923
+ " self.sampling_param = {\n",
924
+ " \"tokens_to_generate\": 7000,\n",
925
+ " \"temperature\": 0.9, # 0.2,\n",
926
+ " \"top_p\": 0.95,\n",
927
+ " }\n",
928
+ " self.timeout = httpx.Timeout(\n",
929
+ " connect=60.0,\n",
930
+ " read=300.0,\n",
931
+ " write=60.0,\n",
932
+ " pool=120.0,\n",
933
+ " )\n",
934
+ "\n",
935
+ " def clean_messages(self, text):\n",
936
+ " cleaned = re.sub(r'<\\|[^|]*\\|>', '', text)\n",
937
+ " return cleaned.strip()\n",
938
+ "\n",
939
+ "\n",
940
+ " async def extract_answer(self, question, model_output):\n",
941
+ " answer = -1\n",
942
+ " confidence = -0.1\n",
943
+ " seed = secrets.randbits(32)\n",
944
+ " input_message = self.clean_messages(model_output)\n",
945
+ " rid = secrets.token_hex(8)\n",
946
+ " message = prompt_template.fill(\n",
947
+ " input_dict={\n",
948
+ " \"problem\": safe_concat(question,input_message,\"extract_answer\"),\n",
949
+ " \"system_prompt\": promptobj.get_dprompt(\"extract_answer\"),\n",
950
+ " },\n",
951
+ " chat_template_kwargs = chat_template_kwargs,\n",
952
+ " format_as_string=True\n",
953
+ " )\n",
954
+ " print(prompt_template)\n",
955
+ " print(\"textd was called\")\n",
956
+ " try:\n",
957
+ " data, completion_tokens = await server_obj.generate_response(\n",
958
+ " prompt=message,\n",
959
+ " random_seed=seed,\n",
960
+ " stream=True,\n",
961
+ " calling_function = \"extract_answer\",\n",
962
+ " extra_body={\"request_id\": rid, \"reasoning_effort\":\"medium\"},\n",
963
+ " timeout = self.timeout,\n",
964
+ " **self.sampling_param,\n",
965
+ " )\n",
966
+ "\n",
967
+ " if data is not None and isinstance(data, dict):\n",
968
+ " return data\n",
969
+ " else:\n",
970
+ " return {\"Answer\":-1, \"Confidence\":-0.1}\n",
971
+ "\n",
972
+ " except Exception as e:\n",
973
+ " print(f\"[extract_answer failed] {type(e).__name__}: {e}\")\n",
974
+ " return {\"Answer\":answer,\"Confidence\": confidence}\n"
975
+ ]
976
+ },
977
+ {
978
+ "cell_type": "markdown",
979
+ "id": "32920345",
980
+ "metadata": {
981
+ "papermill": {
982
+ "duration": 0.004946,
983
+ "end_time": "2026-04-12T05:06:00.141342+00:00",
984
+ "exception": false,
985
+ "start_time": "2026-04-12T05:06:00.136396+00:00",
986
+ "status": "completed"
987
+ },
988
+ "tags": [],
989
+ "id": "32920345"
990
+ },
991
+ "source": [
992
+ "# Inference"
993
+ ]
994
+ },
995
+ {
996
+ "cell_type": "code",
997
+ "execution_count": null,
998
+ "id": "88e12926",
999
+ "metadata": {
1000
+ "execution": {
1001
+ "iopub.execute_input": "2026-04-12T05:06:00.152565Z",
1002
+ "iopub.status.busy": "2026-04-12T05:06:00.151995Z",
1003
+ "iopub.status.idle": "2026-04-12T05:06:00.977284Z",
1004
+ "shell.execute_reply": "2026-04-12T05:06:00.976809Z"
1005
+ },
1006
+ "papermill": {
1007
+ "duration": 0.832359,
1008
+ "end_time": "2026-04-12T05:06:00.978691+00:00",
1009
+ "exception": false,
1010
+ "start_time": "2026-04-12T05:06:00.146332+00:00",
1011
+ "status": "completed"
1012
+ },
1013
+ "tags": [],
1014
+ "id": "88e12926"
1015
+ },
1016
+ "outputs": [],
1017
+ "source": [
1018
+ "# Below will change in kaggle\n",
1019
+ "#Instantiate Server Object\n",
1020
+ "server_obj = get_code_execution_model(server_type = 'vllm',\n",
1021
+ " model=model_path,\n",
1022
+ " base_url=\"http://127.0.0.1:5000/v1\",\n",
1023
+ " api_key='EMPTY',\n",
1024
+ " sandbox=sandbox,\n",
1025
+ " code_execution={\n",
1026
+ " 'max_code_output_characters': max_code_output_characters,\n",
1027
+ " 'code_execution_timeout': code_execution_timeout,\n",
1028
+ " 'max_code_executions': max_code_executions,\n",
1029
+ " })\n",
1030
+ "\n",
1031
+ "async def abort_request(request_ids: str | list[str]):\n",
1032
+ " \"\"\"Sequential best-effort server-side abort.\n",
1033
+ " Uses short timeouts so a slow/down server doesn't block.\n",
1034
+ " Silently ignores failures.\n",
1035
+ " \"\"\"\n",
1036
+ " if isinstance(request_ids, str):\n",
1037
+ " request_ids = [request_ids]\n",
1038
+ "\n",
1039
+ " timeout = httpx.Timeout(connect=1.0, read=2.0, write=1.0, pool=1.0)\n",
1040
+ "\n",
1041
+ " async with httpx.AsyncClient(timeout=timeout) as client:\n",
1042
+ " for rid in request_ids:\n",
1043
+ " try:\n",
1044
+ " await client.delete(f\"http://{host}:{port}/v1/requests/{rid}\")\n",
1045
+ " except Exception:\n",
1046
+ " # optionally log instead of silent pass\n",
1047
+ " pass\n",
1048
+ " await asyncio.sleep(0.05) # cooperative yield"
1049
+ ]
1050
+ },
1051
+ {
1052
+ "cell_type": "code",
1053
+ "execution_count": null,
1054
+ "id": "1134eef5",
1055
+ "metadata": {
1056
+ "execution": {
1057
+ "iopub.execute_input": "2026-04-12T05:06:00.990214Z",
1058
+ "iopub.status.busy": "2026-04-12T05:06:00.990023Z",
1059
+ "iopub.status.idle": "2026-04-12T05:06:01.002419Z",
1060
+ "shell.execute_reply": "2026-04-12T05:06:01.002010Z"
1061
+ },
1062
+ "papermill": {
1063
+ "duration": 0.019361,
1064
+ "end_time": "2026-04-12T05:06:01.003298+00:00",
1065
+ "exception": false,
1066
+ "start_time": "2026-04-12T05:06:00.983937+00:00",
1067
+ "status": "completed"
1068
+ },
1069
+ "tags": [],
1070
+ "id": "1134eef5"
1071
+ },
1072
+ "outputs": [],
1073
+ "source": [
1074
+ "class ClientClass:\n",
1075
+ " def __init__(self, prompt):\n",
1076
+ " global sampling_params\n",
1077
+ " self.thresh_hold = 3 # minimum completions before checking early stop\n",
1078
+ " self.system_prompt = prompt\n",
1079
+ " self.answer = {}\n",
1080
+ " self.randomseed_list = []\n",
1081
+ " self.num_done = 0\n",
1082
+ " self.sampling_param = copy.deepcopy(sampling_params)\n",
1083
+ " self.question = \"\"\n",
1084
+ " self.finished_generations = []\n",
1085
+ " self.final_answer = None\n",
1086
+ " self.early_stop_flag = False\n",
1087
+ " self.flattened_prompt_list = []\n",
1088
+ " self.list_of_questions = []\n",
1089
+ " self.answer_list = []\n",
1090
+ " self.request_ids = [] # per-task IDs for server-side abort\n",
1091
+ " self.tasks = []\n",
1092
+ " self.timeout = httpx.Timeout(\n",
1093
+ " connect=30.0,\n",
1094
+ " read= 500.0 ,\n",
1095
+ " write=30.0,\n",
1096
+ " pool=120.0,\n",
1097
+ " )\n",
1098
+ " self.answerobj = Answer()\n",
1099
+ "\n",
1100
+ " async def send_request_to_server(self):\n",
1101
+ " print(\"Request sent\")\n",
1102
+ " self.request_ids = [secrets.token_hex(8) for _ in self.list_of_questions]\n",
1103
+ " self.randomseed_list = [k for k in range(len(self.list_of_questions))]\n",
1104
+ " for prompt, seed, rid in zip(self.list_of_questions, self.randomseed_list, self.request_ids):\n",
1105
+ " task = asyncio.create_task(\n",
1106
+ " server_obj.generate_async(\n",
1107
+ " prompt=prompt,\n",
1108
+ " random_seed=seed,\n",
1109
+ " timeout=self.timeout,\n",
1110
+ " remove_stop_phrases=False,\n",
1111
+ " stream = True,\n",
1112
+ " extra_body={\"request_id\": rid,\"enable_thinking\":True,\"reasoning_effort\":\"high\"},\n",
1113
+ " **prompt_template.get_code_execution_args(),\n",
1114
+ " **self.sampling_param,\n",
1115
+ " )\n",
1116
+ " )\n",
1117
+ " self.tasks.append(task)\n",
1118
+ "\n",
1119
+ " try:\n",
1120
+ " processed = set()\n",
1121
+ " for completed in asyncio.as_completed(self.tasks):\n",
1122
+ " try:\n",
1123
+ " result = await completed\n",
1124
+ " self.num_done += 1\n",
1125
+ " processed.add(completed) # this adds the task to processed\n",
1126
+ " self.finished_generations.append(result[\"generation\"])\n",
1127
+ " if result[\"answer\"] is not None:\n",
1128
+ " self.answer = json.loads(result[\"answer\"])\n",
1129
+ " print(\"The answer and confidence after json parsing\", self.answer)\n",
1130
+ " yield self.answer\n",
1131
+ " else:\n",
1132
+ " self.answer = await self.answerobj.extract_answer(self.question, result[\"generation\"])\n",
1133
+ " print(\"The answer and confidence after interaction with 2nd model\",self.answer)\n",
1134
+ " yield self.answer\n",
1135
+ " except GeneratorExit:\n",
1136
+ " return\n",
1137
+ " except Exception as e:\n",
1138
+ " traceback.print_exc()\n",
1139
+ " error_type = type(e).__name__\n",
1140
+ " print(f\"[ERROR] {error_type}\")\n",
1141
+ " traceback.print_exc()\n",
1142
+ " self.answer = {\n",
1143
+ " \"Answer\": -1,\n",
1144
+ " \"Confidence\": -0.1,\n",
1145
+ " }\n",
1146
+ " yield self.answer\n",
1147
+ "\n",
1148
+ " finally:\n",
1149
+ " #fallback in the Pipeline timeout handler. Timout\n",
1150
+ " for t in self.tasks:\n",
1151
+ " if t.done() and t not in processed:\n",
1152
+ " try:\n",
1153
+ " if not t.cancelled() and t.exception() is None:\n",
1154
+ " self.res = t.result()\n",
1155
+ "\n",
1156
+ " elif t.exception() is not None:\n",
1157
+ " # optional: handle failed tasks\n",
1158
+ " pass\n",
1159
+ " except Exception:\n",
1160
+ " pass\n",
1161
+ " elif not t.done():\n",
1162
+ " t.cancel()\n",
1163
+ " asyncio.create_task(abort_request(self.request_ids))\n",
1164
+ "\n",
1165
+ " # Fire server-side abort independently — survives parent cancellation\n",
1166
+ "\n",
1167
+ " def flatten_prompt_list(self):\n",
1168
+ " global max_batch_size\n",
1169
+ " self.flattened_prompt_list = [\n",
1170
+ " self.system_prompt\n",
1171
+ " # for system_prompt in self.prompts_list\n",
1172
+ " for _ in range(max_batch_size)\n",
1173
+ " ]\n",
1174
+ "\n",
1175
+ " def generate_question_copies(self, question):\n",
1176
+ " self.question = question\n",
1177
+ " self.list_of_questions = [\n",
1178
+ " prompt_template.fill(\n",
1179
+ " input_dict={\n",
1180
+ " \"problem\": question,\n",
1181
+ " \"system_prompt\": system_prompt,\n",
1182
+ " },\n",
1183
+ " chat_template_kwargs = chat_template_kwargs,\n",
1184
+ " format_as_string=True\n",
1185
+ " )\n",
1186
+ " for system_prompt in self.flattened_prompt_list\n",
1187
+ " ]\n",
1188
+ "\n",
1189
+ "\n",
1190
+ " async def predict_for_question(self, question):\n",
1191
+ " self.flatten_prompt_list()\n",
1192
+ " self.generate_question_copies(question)\n",
1193
+ "\n",
1194
+ " gen = self.send_request_to_server()\n",
1195
+ "\n",
1196
+ " try:\n",
1197
+ " async for answer in gen:\n",
1198
+ " yield answer\n",
1199
+ "\n",
1200
+ " except Exception as e:\n",
1201
+ " print(\"Error in predict_for_question:\", e)\n",
1202
+ " raise\n",
1203
+ "\n",
1204
+ " finally:\n",
1205
+ " try:\n",
1206
+ " await gen.aclose()\n",
1207
+ " except Exception:\n",
1208
+ " pass"
1209
+ ]
1210
+ },
1211
+ {
1212
+ "cell_type": "code",
1213
+ "execution_count": null,
1214
+ "id": "5c553f96",
1215
+ "metadata": {
1216
+ "execution": {
1217
+ "iopub.execute_input": "2026-04-12T05:06:01.014551Z",
1218
+ "iopub.status.busy": "2026-04-12T05:06:01.014122Z",
1219
+ "iopub.status.idle": "2026-04-12T05:06:01.019371Z",
1220
+ "shell.execute_reply": "2026-04-12T05:06:01.018984Z"
1221
+ },
1222
+ "papermill": {
1223
+ "duration": 0.011845,
1224
+ "end_time": "2026-04-12T05:06:01.020211+00:00",
1225
+ "exception": false,
1226
+ "start_time": "2026-04-12T05:06:01.008366+00:00",
1227
+ "status": "completed"
1228
+ },
1229
+ "tags": [],
1230
+ "id": "5c553f96"
1231
+ },
1232
+ "outputs": [],
1233
+ "source": [
1234
+ "import math\n",
1235
+ "\n",
1236
+ "class BufferBorrower:\n",
1237
+ " \"\"\"\n",
1238
+ " Dynamic buffer-time borrowing strategy for inference.\n",
1239
+ "\n",
1240
+ " Borrows from buffer time based on task difficulty and step-back\n",
1241
+ " token usage, using a sigmoid curve for smooth allocation.\n",
1242
+ "\n",
1243
+ " Parameters\n",
1244
+ " ----------\n",
1245
+ " max_difficulty : int or float\n",
1246
+ " The upper bound of the difficulty scale (e.g., 5 or 1.0).\n",
1247
+ " alpha : float\n",
1248
+ " Weight for the difficulty signal (default 0.6).\n",
1249
+ " beta : float\n",
1250
+ " Weight for the step-back token signal (default 0.4).\n",
1251
+ " b_max : float\n",
1252
+ " Maximum fraction of buffer that can be borrowed (default 0.7).\n",
1253
+ " k : float\n",
1254
+ " Steepness of the sigmoid transition (default 6).\n",
1255
+ " threshold : float\n",
1256
+ " Midpoint of the sigmoid curve (default 0.4).\n",
1257
+ " \"\"\"\n",
1258
+ "\n",
1259
+ " def __init__(\n",
1260
+ " self,\n",
1261
+ " b_max: float = 0.85,\n",
1262
+ " k: float = 6.0,\n",
1263
+ " threshold: float = 0.4,\n",
1264
+ " total_questions: int = 50,\n",
1265
+ " total_available_time: int = 15720,\n",
1266
+ " ):\n",
1267
+ "\n",
1268
+ " self.b_max = b_max\n",
1269
+ " self.k = k\n",
1270
+ " self.threshold = threshold\n",
1271
+ " self.total_questions = total_questions\n",
1272
+ " self.total_available_time = total_available_time\n",
1273
+ "\n",
1274
+ " def compute_time_pressure(\n",
1275
+ " self,\n",
1276
+ " remaining_time: float,\n",
1277
+ " questions_completed: int,\n",
1278
+ " global_buffer: float = 0.0,\n",
1279
+ " ) -> float:\n",
1280
+ " remaining_q = max(1, self.total_questions - questions_completed)\n",
1281
+ " if remaining_time <= 0:\n",
1282
+ " return 1.5\n",
1283
+ " ideal_pace = self.total_available_time / self.total_questions\n",
1284
+ " available_pace = remaining_time / remaining_q\n",
1285
+ " pressure = ideal_pace / available_pace\n",
1286
+ " return max(0.3, min(1.5, pressure))\n",
1287
+ "\n",
1288
+ " def allocate_time(\n",
1289
+ " self,\n",
1290
+ " remaining_time: float,\n",
1291
+ " questions_completed: int,\n",
1292
+ " global_buffer: float = 0.0,\n",
1293
+ " allowed_time : float = 320,\n",
1294
+ " ) -> dict:\n",
1295
+ " \"\"\"\n",
1296
+ " Allocate effective inference and remaining buffer time.\n",
1297
+ "\n",
1298
+ " Parameters\n",
1299
+ " ----------\n",
1300
+ " allowed_time : float\n",
1301
+ " Base inference time budget.\n",
1302
+ " global_buffer : float\n",
1303
+ " global buffer time budget.\n",
1304
+ " difficulty : float\n",
1305
+ " Task difficulty score.\n",
1306
+ " stepback_tokens : int\n",
1307
+ " Tokens used in step-back phase.\n",
1308
+ " stepback_budget : int\n",
1309
+ " Total step-back token budget.\n",
1310
+ "\n",
1311
+ " Returns\n",
1312
+ " -------\n",
1313
+ " dict\n",
1314
+ " Keys: effective_inference, remaining_buffer, borrowed,\n",
1315
+ " borrow_fraction.\n",
1316
+ " \"\"\"\n",
1317
+ " pressure = self.compute_time_pressure(\n",
1318
+ " remaining_time,\n",
1319
+ " questions_completed,\n",
1320
+ " global_buffer\n",
1321
+ " )\n",
1322
+ " borrow_fraction = 1/pressure\n",
1323
+ " max_borrowable = 95\n",
1324
+ " print(\"borrow fraction\", borrow_fraction)\n",
1325
+ " borrowed = min(pressure * global_buffer, max_borrowable)\n",
1326
+ "\n",
1327
+ "\n",
1328
+ " return {\n",
1329
+ " \"effective_inference\": allowed_time + borrowed,\n",
1330
+ " \"global_buffer\": global_buffer - borrowed,\n",
1331
+ " \"borrowed\": borrowed,\n",
1332
+ " \"borrow_fraction\": borrow_fraction,\n",
1333
+ " }\n"
1334
+ ]
1335
+ },
1336
+ {
1337
+ "cell_type": "code",
1338
+ "execution_count": null,
1339
+ "id": "ac15f646",
1340
+ "metadata": {
1341
+ "execution": {
1342
+ "iopub.execute_input": "2026-04-12T05:06:01.030935Z",
1343
+ "iopub.status.busy": "2026-04-12T05:06:01.030736Z",
1344
+ "iopub.status.idle": "2026-04-12T05:06:01.034205Z",
1345
+ "shell.execute_reply": "2026-04-12T05:06:01.033802Z"
1346
+ },
1347
+ "papermill": {
1348
+ "duration": 0.009979,
1349
+ "end_time": "2026-04-12T05:06:01.035087+00:00",
1350
+ "exception": false,
1351
+ "start_time": "2026-04-12T05:06:01.025108+00:00",
1352
+ "status": "completed"
1353
+ },
1354
+ "tags": [],
1355
+ "id": "ac15f646"
1356
+ },
1357
+ "outputs": [],
1358
+ "source": [
1359
+ "class TimeBudget:\n",
1360
+ " def __init__(self, total_seconds):\n",
1361
+ " self.start = time.perf_counter()\n",
1362
+ " self.deadline = self.start + total_seconds\n",
1363
+ "\n",
1364
+ " @property\n",
1365
+ " def remaining(self):\n",
1366
+ " return max(0, self.deadline - time.perf_counter())\n",
1367
+ "\n",
1368
+ " @property\n",
1369
+ " def elapsed(self):\n",
1370
+ " return time.perf_counter() - self.start\n",
1371
+ "\n",
1372
+ " @property\n",
1373
+ " def expired(self):\n",
1374
+ " return self.remaining <= 0\n"
1375
+ ]
1376
+ },
1377
+ {
1378
+ "cell_type": "code",
1379
+ "execution_count": null,
1380
+ "id": "2a278fab",
1381
+ "metadata": {
1382
+ "execution": {
1383
+ "iopub.execute_input": "2026-04-12T05:06:01.045591Z",
1384
+ "iopub.status.busy": "2026-04-12T05:06:01.045424Z",
1385
+ "iopub.status.idle": "2026-04-12T05:06:01.053193Z",
1386
+ "shell.execute_reply": "2026-04-12T05:06:01.052801Z"
1387
+ },
1388
+ "papermill": {
1389
+ "duration": 0.014099,
1390
+ "end_time": "2026-04-12T05:06:01.054027+00:00",
1391
+ "exception": false,
1392
+ "start_time": "2026-04-12T05:06:01.039928+00:00",
1393
+ "status": "completed"
1394
+ },
1395
+ "tags": [],
1396
+ "id": "2a278fab"
1397
+ },
1398
+ "outputs": [],
1399
+ "source": [
1400
+ "class Pipeline:\n",
1401
+ " def __init__(self):\n",
1402
+ " self.budget_seconds = 0\n",
1403
+ " self.k = 1\n",
1404
+ " self.budget_seconds = 0\n",
1405
+ " async def get_prediction(self, problem_text):\n",
1406
+ " global global_buffer, i, borrower, max_batch_size,last_30, sampling_param\n",
1407
+ " budgetobj = None\n",
1408
+ " timeout = 60\n",
1409
+ " # Timeout at this level - see if needs to be implemented\n",
1410
+ " thresh_hold = 3\n",
1411
+ " num_done = 0\n",
1412
+ " max_generation_count = self.k*max_batch_size\n",
1413
+ " answer_list = []\n",
1414
+ " finalanswerobj = Result()\n",
1415
+ " print(\"Pipeline step 1\")\n",
1416
+ " deadline = 0\n",
1417
+ " allowed_time = 320\n",
1418
+ " self.budget_seconds = allowed_time\n",
1419
+ " if global_buffer> 0:\n",
1420
+ " result = borrower.allocate_time(\n",
1421
+ " remaining_time = get_global_remaining(),\n",
1422
+ " questions_completed = i,\n",
1423
+ " allowed_time = allowed_time,\n",
1424
+ " global_buffer = global_buffer\n",
1425
+ " )\n",
1426
+ "\n",
1427
+ " self.budget_seconds = result[\"effective_inference\"]\n",
1428
+ " global_buffer = result[\"global_buffer\"]\n",
1429
+ " print(f'borrowed={result[\"borrowed\"]:.0f}')\n",
1430
+ " print(f\"Budget: base={allowed_time:.0f}s \"\n",
1431
+ " f\"= {self.budget_seconds:.0f}s (global remaining: {get_global_remaining():.0f}s)\")\n",
1432
+ " budgetobj = TimeBudget(self.budget_seconds)\n",
1433
+ "\n",
1434
+ " clientobj = ClientClass(default_prompt)\n",
1435
+ " deadline = max(deadline, budgetobj.remaining)\n",
1436
+ " operation_start_time = time.perf_counter()\n",
1437
+ " print(\"Deadline is\", deadline)\n",
1438
+ " gen = clientobj.predict_for_question(problem_text)\n",
1439
+ " try:\n",
1440
+ " async with asyncio.timeout(deadline):\n",
1441
+ " async for answer in gen:\n",
1442
+ " answer_list.append(answer)\n",
1443
+ " print(\"Answer list on timeout is:-\")\n",
1444
+ " print(answer_list)\n",
1445
+ " num_done = len(answer_list)\n",
1446
+ " if num_done >= thresh_hold and num_done < max_generation_count:\n",
1447
+ " prediction, early_stop_flag = finalanswerobj.get_best_answer(answer_list, num_done, True)\n",
1448
+ " if early_stop_flag:\n",
1449
+ " return prediction\n",
1450
+ "\n",
1451
+ " elif num_done == max_generation_count:\n",
1452
+ " prediction, _ = finalanswerobj.get_best_answer(answer_list, num_done, False)\n",
1453
+ " return prediction\n",
1454
+ " else:\n",
1455
+ " continue\n",
1456
+ " except (TimeoutError, asyncio.TimeoutError):\n",
1457
+ " traceback.print_exc()\n",
1458
+ " prediction, _ = finalanswerobj.get_best_answer(answer_list, num_done, False)\n",
1459
+ " return prediction\n",
1460
+ "\n",
1461
+ " except Exception as e:\n",
1462
+ " traceback.print_exc()\n",
1463
+ " print(f\"UNEXPECTED ERROR: {type(e).__name__} {e}\")\n",
1464
+ " if answer_list:\n",
1465
+ " prediction, _ = finalanswerobj.get_best_answer(answer_list, num_done, False)\n",
1466
+ " return prediction\n",
1467
+ " return None\n",
1468
+ "\n",
1469
+ " finally:\n",
1470
+ " await gen.aclose()\n",
1471
+ " print(\"Operation duration\", time.perf_counter()-operation_start_time)\n",
1472
+ " if budgetobj.elapsed > self.budget_seconds:\n",
1473
+ " global_buffer -= (budgetobj.elapsed - self.budget_seconds)\n",
1474
+ " else:\n",
1475
+ " global_buffer += (self.budget_seconds - budgetobj.elapsed)\n",
1476
+ "\n"
1477
+ ]
1478
+ },
1479
+ {
1480
+ "cell_type": "code",
1481
+ "execution_count": null,
1482
+ "id": "e931c8db",
1483
+ "metadata": {
1484
+ "execution": {
1485
+ "iopub.execute_input": "2026-04-12T05:06:01.064603Z",
1486
+ "iopub.status.busy": "2026-04-12T05:06:01.064428Z",
1487
+ "iopub.status.idle": "2026-04-12T05:06:01.068387Z",
1488
+ "shell.execute_reply": "2026-04-12T05:06:01.068000Z"
1489
+ },
1490
+ "papermill": {
1491
+ "duration": 0.010469,
1492
+ "end_time": "2026-04-12T05:06:01.069242+00:00",
1493
+ "exception": false,
1494
+ "start_time": "2026-04-12T05:06:01.058773+00:00",
1495
+ "status": "completed"
1496
+ },
1497
+ "tags": [],
1498
+ "id": "e931c8db"
1499
+ },
1500
+ "outputs": [],
1501
+ "source": [
1502
+ "def predict(id_: pl.Series, problem: pl.Series) -> pl.DataFrame | pd.DataFrame:\n",
1503
+ " \"\"\"Make a prediction.\"\"\"\n",
1504
+ " global server_started, i\n",
1505
+ " start_pred_time = time.perf_counter()\n",
1506
+ " pipelineobj = Pipeline()\n",
1507
+ " if server_started is False:\n",
1508
+ " server_started = wait_for_server()\n",
1509
+ "\n",
1510
+ " id_ = id_.item(0)\n",
1511
+ " problem_text: str = problem.item(0)\n",
1512
+ "\n",
1513
+ " # BUG FIX: compare duration to duration (was comparing duration to absolute timestamp)\n",
1514
+ " if get_global_remaining() < 30:\n",
1515
+ " return pl.DataFrame({\"id\": id_, \"answer\": 29443})\n",
1516
+ " loop = asyncio.get_event_loop()\n",
1517
+ " prediction = loop.run_until_complete(pipelineobj.get_prediction(problem_text))\n",
1518
+ "\n",
1519
+ " # If prediction is still None after everything, use fallback\n",
1520
+ " if prediction is None:\n",
1521
+ " prediction = 29443\n",
1522
+ "\n",
1523
+ " i = i + 1\n",
1524
+ "\n",
1525
+ " print(\"Returned dataframe is \", pl.DataFrame({\"id\": id_, \"answer\": prediction}))\n",
1526
+ " return pl.DataFrame({\"id\": id_, \"answer\": prediction})\n"
1527
+ ]
1528
+ },
1529
+ {
1530
+ "cell_type": "code",
1531
+ "execution_count": null,
1532
+ "id": "a7394047",
1533
+ "metadata": {
1534
+ "execution": {
1535
+ "iopub.execute_input": "2026-04-12T05:06:01.079734Z",
1536
+ "iopub.status.busy": "2026-04-12T05:06:01.079548Z",
1537
+ "iopub.status.idle": "2026-04-12T05:17:09.993610Z",
1538
+ "shell.execute_reply": "2026-04-12T05:17:09.993024Z"
1539
+ },
1540
+ "papermill": {
1541
+ "duration": 668.920683,
1542
+ "end_time": "2026-04-12T05:17:09.994734+00:00",
1543
+ "exception": false,
1544
+ "start_time": "2026-04-12T05:06:01.074051+00:00",
1545
+ "status": "completed"
1546
+ },
1547
+ "tags": [],
1548
+ "id": "a7394047"
1549
+ },
1550
+ "outputs": [],
1551
+ "source": [
1552
+ "#Change the path of the csv file\n",
1553
+ "inference_server = kaggle_evaluation.aimo_3_inference_server.AIMO3InferenceServer(\n",
1554
+ " predict\n",
1555
+ ")\n",
1556
+ "borrower = BufferBorrower(total_questions = 50, total_available_time = get_global_remaining())\n",
1557
+ "if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):\n",
1558
+ " # You MUST call this within 15 minutes of the script starting. This is to\n",
1559
+ " # ensure a \"fast fail\" in case a bug prevents the inference server from starting.\n",
1560
+ " # Do anything that might take a long time (like model loading) in the predict\n",
1561
+ " # function, which has no time limit.\n",
1562
+ " try:\n",
1563
+ " start = time.perf_counter()\n",
1564
+ " inference_server.serve()\n",
1565
+ "\n",
1566
+ " finally:\n",
1567
+ " finish =time.perf_counter()\n",
1568
+ " time_taken_ = finish-start\n",
1569
+ " print(\"Time Taken\",time_taken_)\n",
1570
+ " print(i)\n",
1571
+ " with open(\"rerun.txt\", \"a\") as f:\n",
1572
+ " f.write(f\"Time taken: {time_taken_:.6f},Questions Completed: {i}\\n\")\n",
1573
+ "\n",
1574
+ "else:\n",
1575
+ "\n",
1576
+ " try:\n",
1577
+ " start = time.perf_counter()\n",
1578
+ " inference_server.run_local_gateway(\n",
1579
+ " ('/kaggle/input/competitions/ai-mathematical-olympiad-progress-prize-3/test.csv',))\n",
1580
+ "\n",
1581
+ " finally:\n",
1582
+ " finish = time.perf_counter()\n",
1583
+ " time_taken_ = finish - start\n",
1584
+ " print(\"Time Taken\",time_taken_ )\n",
1585
+ " with open(\"info.txt\", \"a\") as f:\n",
1586
+ " f.write(f\" Time taken: {time_taken_:.6f},Questions Completed: {i}\\n\")\n",
1587
+ " print(i)"
1588
+ ]
1589
+ },
1590
+ {
1591
+ "cell_type": "code",
1592
+ "execution_count": null,
1593
+ "id": "63d04159",
1594
+ "metadata": {
1595
+ "papermill": {
1596
+ "duration": 0.00535,
1597
+ "end_time": "2026-04-12T05:17:10.005304+00:00",
1598
+ "exception": false,
1599
+ "start_time": "2026-04-12T05:17:09.999954+00:00",
1600
+ "status": "completed"
1601
+ },
1602
+ "tags": [],
1603
+ "id": "63d04159"
1604
+ },
1605
+ "outputs": [],
1606
+ "source": []
1607
+ }
1608
+ ],
1609
+ "metadata": {
1610
+ "kaggle": {
1611
+ "accelerator": "nvidiaH100",
1612
+ "dataSources": [
1613
+ {
1614
+ "databundleVersionId": 14559231,
1615
+ "sourceId": 118448,
1616
+ "sourceType": "competition"
1617
+ },
1618
+ {
1619
+ "databundleVersionId": 16263450,
1620
+ "datasetId": 9820761,
1621
+ "sourceId": 15353457,
1622
+ "sourceType": "datasetVersion"
1623
+ },
1624
+ {
1625
+ "databundleVersionId": 16223586,
1626
+ "datasetId": 9797230,
1627
+ "sourceId": 15317833,
1628
+ "sourceType": "datasetVersion"
1629
+ },
1630
+ {
1631
+ "databundleVersionId": 16583370,
1632
+ "datasetId": 10017044,
1633
+ "sourceId": 15647597,
1634
+ "sourceType": "datasetVersion"
1635
+ },
1636
+ {
1637
+ "databundleVersionId": 15942658,
1638
+ "datasetId": 9642480,
1639
+ "sourceId": 15061462,
1640
+ "sourceType": "datasetVersion"
1641
+ },
1642
+ {
1643
+ "databundleVersionId": 15744165,
1644
+ "datasetId": 9520808,
1645
+ "sourceId": 14881112,
1646
+ "sourceType": "datasetVersion"
1647
+ },
1648
+ {
1649
+ "databundleVersionId": 16607303,
1650
+ "datasetId": 10033809,
1651
+ "sourceId": 15670043,
1652
+ "sourceType": "datasetVersion"
1653
+ },
1654
+ {
1655
+ "databundleVersionId": 15706375,
1656
+ "datasetId": 9495812,
1657
+ "sourceId": 14846606,
1658
+ "sourceType": "datasetVersion"
1659
+ },
1660
+ {
1661
+ "databundleVersionId": 16606906,
1662
+ "modelId": 641049,
1663
+ "modelInstanceId": 629147,
1664
+ "sourceId": 827437,
1665
+ "sourceType": "modelInstanceVersion"
1666
+ },
1667
+ {
1668
+ "sourceId": 303511002,
1669
+ "sourceType": "kernelVersion"
1670
+ },
1671
+ {
1672
+ "sourceId": 303518560,
1673
+ "sourceType": "kernelVersion"
1674
+ }
1675
+ ],
1676
+ "dockerImageVersionId": 31329,
1677
+ "isGpuEnabled": true,
1678
+ "isInternetEnabled": false,
1679
+ "language": "python",
1680
+ "sourceType": "notebook"
1681
+ },
1682
+ "kernelspec": {
1683
+ "display_name": "Python 3",
1684
+ "language": "python",
1685
+ "name": "python3"
1686
+ },
1687
+ "language_info": {
1688
+ "codemirror_mode": {
1689
+ "name": "ipython",
1690
+ "version": 3
1691
+ },
1692
+ "file_extension": ".py",
1693
+ "mimetype": "text/x-python",
1694
+ "name": "python",
1695
+ "nbconvert_exporter": "python",
1696
+ "pygments_lexer": "ipython3",
1697
+ "version": "3.12.12"
1698
+ },
1699
+ "papermill": {
1700
+ "default_parameters": {},
1701
+ "duration": 933.409462,
1702
+ "end_time": "2026-04-12T05:17:12.627119+00:00",
1703
+ "environment_variables": {},
1704
+ "exception": null,
1705
+ "input_path": "__notebook__.ipynb",
1706
+ "output_path": "__notebook__.ipynb",
1707
+ "parameters": {},
1708
+ "start_time": "2026-04-12T05:01:39.217657+00:00",
1709
+ "version": "2.7.0"
1710
+ },
1711
+ "colab": {
1712
+ "provenance": []
1713
+ }
1714
+ },
1715
+ "nbformat": 4,
1716
+ "nbformat_minor": 5
1717
+ }