{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
      "Token is valid (permission: fineGrained).\n",
      "Your token has been saved to /home/ady/.cache/huggingface/token\n",
      "Login successful\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-15 15:22:07,933\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 09-15 15:22:08 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=26000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llama/Meta-Llama-3.1-8B-Instruct, use_v2_block_manager=False, num_scheduler_steps=1, enable_prefix_caching=False, use_async_output_proc=True)\n",
      "INFO 09-15 15:22:09 model_runner.py:997] Starting to load model meta-llama/Meta-Llama-3.1-8B-Instruct...\n",
      "INFO 09-15 15:22:09 weight_utils.py:242] Using model weights format ['*.safetensors']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]\n",
      "Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:00<00:00,  6.32it/s]\n",
      "Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:00<00:00,  2.16it/s]\n",
      "Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:01<00:00,  1.72it/s]\n",
      "Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00,  1.58it/s]\n",
      "Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00,  1.77it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 09-15 15:22:12 model_runner.py:1008] Loading model weights took 14.9888 GB\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 09-15 15:22:15 gpu_executor.py:122] # GPU blocks: 13724, # CPU blocks: 2048\n",
      "INFO 09-15 15:22:17 model_runner.py:1311] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.\n",
      "INFO 09-15 15:22:17 model_runner.py:1315] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n",
      "INFO 09-15 15:22:26 model_runner.py:1430] Graph capturing finished in 10 secs.\n"
     ]
    }
   ],
   "source": [
    "from huggingface_hub import login\n",
    "import os \n",
    "import torch\n",
    "torch.cuda.empty_cache()\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "\n",
    "# Use your Hugging Face token here\n",
    "login(\"\")\n",
    "\n",
    "from vllm import LLM, SamplingParams\n",
    "\n",
    "\n",
    "TEXT_LLM_MODEL_MISTRAL='meta-llama/Meta-Llama-3.1-8B-Instruct'\n",
    "\n",
    "\n",
    "\n",
    "vllm_model = LLM(\n",
    "        model=TEXT_LLM_MODEL_MISTRAL,\n",
    "        tensor_parallel_size=1,         # Use both GPUs\n",
    "        gpu_memory_utilization=0.95,     # Lower memory utilization to prevent OOM\n",
    "        max_model_len=26000)\n",
    "        \n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "# !pwd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Router Data Creation "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>username</th>\n",
       "      <th>text</th>\n",
       "      <th>category</th>\n",
       "      <th>values</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>4b0d73e8-6ac2-4214-8c68-7d1fc36dd12d</td>\n",
       "      <td>What’s is the P/E of CVS compared to competitors</td>\n",
       "      <td>CompanyAnalysis</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0f52d06c-09d2-4c99-8faa-9ec98808ae02</td>\n",
       "      <td>How does Cigar-Butt Investing differ from othe...</td>\n",
       "      <td>LLM</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ed2ece58-3ce3-4cb9-a013-2e569bb14fcf</td>\n",
       "      <td>tell me a stock which will increase by 5 % in ...</td>\n",
       "      <td>OutOfScope</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>5ce399be-648c-4768-8e25-a73ca480d4e2</td>\n",
       "      <td>Is MTCH revenue growing?</td>\n",
       "      <td>CompanyAnalysis</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>f760f937-be70-4ab6-9ad8-3b69edae7a76</td>\n",
       "      <td>What is Momentum investing?</td>\n",
       "      <td>LLM</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1042</th>\n",
       "      <td>18776314-d3b4-4fd4-a328-991b448c8a3c</td>\n",
       "      <td>What is the intrinsic value of Brookfield corp...</td>\n",
       "      <td>CompanyAnalysis</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1043</th>\n",
       "      <td>17e24d74-ce34-4c80-a4b0-00f91021e28a</td>\n",
       "      <td>Build me a stock portfolio before general elec...</td>\n",
       "      <td>OutOfScope</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1064</th>\n",
       "      <td>044fdc80-40d2-4322-8c5a-749bdb1c651d</td>\n",
       "      <td>Which sectors are most affected by economic cy...</td>\n",
       "      <td>LLM</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1072</th>\n",
       "      <td>3c484112-d34d-459f-8e03-33d488088e4b</td>\n",
       "      <td>what do you know about the sentiment of intel ...</td>\n",
       "      <td>CompanyAnalysis</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1091</th>\n",
       "      <td>f4fccd78-2c2a-4d97-9df6-d5504d87ee7e</td>\n",
       "      <td>Hi</td>\n",
       "      <td>OutOfScope</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>200 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  username  \\\n",
       "0     4b0d73e8-6ac2-4214-8c68-7d1fc36dd12d   \n",
       "2     0f52d06c-09d2-4c99-8faa-9ec98808ae02   \n",
       "3     ed2ece58-3ce3-4cb9-a013-2e569bb14fcf   \n",
       "6     5ce399be-648c-4768-8e25-a73ca480d4e2   \n",
       "11    f760f937-be70-4ab6-9ad8-3b69edae7a76   \n",
       "...                                    ...   \n",
       "1042  18776314-d3b4-4fd4-a328-991b448c8a3c   \n",
       "1043  17e24d74-ce34-4c80-a4b0-00f91021e28a   \n",
       "1064  044fdc80-40d2-4322-8c5a-749bdb1c651d   \n",
       "1072  3c484112-d34d-459f-8e03-33d488088e4b   \n",
       "1091  f4fccd78-2c2a-4d97-9df6-d5504d87ee7e   \n",
       "\n",
       "                                                   text         category  \\\n",
       "0      What’s is the P/E of CVS compared to competitors  CompanyAnalysis   \n",
       "2     How does Cigar-Butt Investing differ from othe...              LLM   \n",
       "3     tell me a stock which will increase by 5 % in ...       OutOfScope   \n",
       "6                              Is MTCH revenue growing?  CompanyAnalysis   \n",
       "11                          What is Momentum investing?              LLM   \n",
       "...                                                 ...              ...   \n",
       "1042  What is the intrinsic value of Brookfield corp...  CompanyAnalysis   \n",
       "1043  Build me a stock portfolio before general elec...       OutOfScope   \n",
       "1064  Which sectors are most affected by economic cy...              LLM   \n",
       "1072  what do you know about the sentiment of intel ...  CompanyAnalysis   \n",
       "1091                                                 Hi       OutOfScope   \n",
       "\n",
       "      values  \n",
       "0        1.0  \n",
       "2        1.0  \n",
       "3        1.0  \n",
       "6        1.0  \n",
       "11       1.0  \n",
       "...      ...  \n",
       "1042     1.0  \n",
       "1043     1.0  \n",
       "1064     1.0  \n",
       "1072     1.0  \n",
       "1091     1.0  \n",
       "\n",
       "[200 rows x 4 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "NUM_EXAMPLES=200\n",
    "\n",
    "router_tagged_df=pd.read_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Router_Tagging.csv')[1:].drop(['Others','NotesForOthers'],axis=1).fillna(0)\n",
    "router_tagged_df=router_tagged_df[router_tagged_df['AdyTagged']==1]\n",
    "router_tagged_df=router_tagged_df.drop(['AdyTagged'],axis=1).drop_duplicates()\n",
    "\n",
    "router_tagged_df = router_tagged_df.melt(id_vars=['username', 'question'], var_name='category', value_name='values')\n",
    "router_tagged_df=router_tagged_df.sample(frac=1).reset_index(drop=True)\n",
    "router_tagged_df=router_tagged_df.rename(columns={'question':'text'})\n",
    "router_tagged_df=router_tagged_df[router_tagged_df['values']==1]\n",
    "\n",
    "\n",
    "router_tagged_df_sample=router_tagged_df[0:NUM_EXAMPLES]\n",
    "router_tagged_df_sample\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts:   0%|          | 0/600 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]"
     ]
    },
    {
     "ename": "RuntimeError",
     "evalue": "LLMEngine should not be pickled!",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner_base.py:112\u001b[0m, in \u001b[0;36mdump_input_when_exception.<locals>._inner.<locals>._wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    111\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 112\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1546\u001b[0m, in \u001b[0;36mModelRunner.execute_model\u001b[0;34m(self, model_input, kv_caches, intermediate_tensors, num_steps)\u001b[0m\n\u001b[1;32m   1544\u001b[0m     model_forward_start\u001b[38;5;241m.\u001b[39mrecord()\n\u001b[0;32m-> 1546\u001b[0m hidden_or_intermediate_states \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_executable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1547\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1548\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_positions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1549\u001b[0m \u001b[43m    \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1550\u001b[0m \u001b[43m    \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1551\u001b[0m \u001b[43m    \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1552\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mMultiModalInputs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmulti_modal_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1553\u001b[0m \u001b[43m                                 \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1554\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mseqlen_agnostic_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1556\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1557\u001b[0m         \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config\u001b[38;5;241m.\u001b[39mcollect_model_forward_time):\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:377\u001b[0m, in \u001b[0;36mMixtralForCausalLM.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m    369\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\n\u001b[1;32m    370\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    371\u001b[0m     input_ids: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    375\u001b[0m     intermediate_tensors: Optional[IntermediateTensors] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    376\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m--> 377\u001b[0m     hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    378\u001b[0m \u001b[43m                               \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    379\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:297\u001b[0m, in \u001b[0;36mMixtralModel.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m    296\u001b[0m     layer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlayers[i]\n\u001b[0;32m--> 297\u001b[0m     hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    298\u001b[0m \u001b[43m                                    \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_layer\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    299\u001b[0m \u001b[43m                                    \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresidual\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    300\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:244\u001b[0m, in \u001b[0;36mMixtralDecoderLayer.forward\u001b[0;34m(self, positions, hidden_states, kv_cache, attn_metadata, residual)\u001b[0m\n\u001b[1;32m    242\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(\n\u001b[1;32m    243\u001b[0m     hidden_states, residual)\n\u001b[0;32m--> 244\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_sparse_moe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    245\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states, residual\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:101\u001b[0m, in \u001b[0;36mMixtralMoE.forward\u001b[0;34m(self, hidden_states)\u001b[0m\n\u001b[1;32m    100\u001b[0m router_logits, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgate(hidden_states)\n\u001b[0;32m--> 101\u001b[0m final_hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexperts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrouter_logits\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m final_hidden_states\u001b[38;5;241m.\u001b[39mview(orig_shape)\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/layer.py:469\u001b[0m, in \u001b[0;36mFusedMoE.forward\u001b[0;34m(self, hidden_states, router_logits)\u001b[0m\n\u001b[1;32m    468\u001b[0m \u001b[38;5;66;03m# Matrix multiply.\u001b[39;00m\n\u001b[0;32m--> 469\u001b[0m final_hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_method\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    470\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    471\u001b[0m \u001b[43m    \u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    472\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrouter_logits\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouter_logits\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    473\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    474\u001b[0m \u001b[43m    \u001b[49m\u001b[43mrenormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrenormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    475\u001b[0m \u001b[43m    \u001b[49m\u001b[43muse_grouped_topk\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_grouped_topk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    476\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtopk_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtopk_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    477\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_expert_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_expert_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    478\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcustom_routing_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcustom_routing_function\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreduce_results \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtp_size \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/layer.py:78\u001b[0m, in \u001b[0;36mUnquantizedFusedMoEMethod.apply\u001b[0;34m(self, layer, x, router_logits, top_k, renormalize, use_grouped_topk, topk_group, num_expert_group, custom_routing_function)\u001b[0m\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m     66\u001b[0m         \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m     67\u001b[0m         layer: torch\u001b[38;5;241m.\u001b[39mnn\u001b[38;5;241m.\u001b[39mModule,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     75\u001b[0m         custom_routing_function: Optional[Callable] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     76\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m---> 78\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     79\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     80\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mrouter_logits\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouter_logits\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     81\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     82\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mrenormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrenormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     83\u001b[0m \u001b[43m                        \u001b[49m\u001b[43muse_grouped_topk\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_grouped_topk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     84\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mtopk_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtopk_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     85\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mnum_expert_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_expert_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     86\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mcustom_routing_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcustom_routing_function\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/custom_op.py:14\u001b[0m, in \u001b[0;36mCustomOp.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 14\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_forward_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/layer.py:114\u001b[0m, in \u001b[0;36mUnquantizedFusedMoEMethod.forward_cuda\u001b[0;34m(self, layer, x, use_grouped_topk, top_k, router_logits, renormalize, topk_group, num_expert_group, custom_routing_function)\u001b[0m\n\u001b[1;32m    104\u001b[0m topk_weights, topk_ids \u001b[38;5;241m=\u001b[39m FusedMoE\u001b[38;5;241m.\u001b[39mselect_experts(\n\u001b[1;32m    105\u001b[0m     hidden_states\u001b[38;5;241m=\u001b[39mx,\n\u001b[1;32m    106\u001b[0m     router_logits\u001b[38;5;241m=\u001b[39mrouter_logits,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    111\u001b[0m     num_expert_group\u001b[38;5;241m=\u001b[39mnum_expert_group,\n\u001b[1;32m    112\u001b[0m     custom_routing_function\u001b[38;5;241m=\u001b[39mcustom_routing_function)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfused_experts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    115\u001b[0m \u001b[43m                     \u001b[49m\u001b[43mw1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mw13_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    116\u001b[0m \u001b[43m                     \u001b[49m\u001b[43mw2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mw2_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    117\u001b[0m \u001b[43m                     \u001b[49m\u001b[43mtopk_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtopk_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    118\u001b[0m \u001b[43m                     \u001b[49m\u001b[43mtopk_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtopk_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    119\u001b[0m \u001b[43m                     \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/fused_moe.py:509\u001b[0m, in \u001b[0;36mfused_experts\u001b[0;34m(hidden_states, w1, w2, topk_weights, topk_ids, inplace, override_config, use_fp8_w8a8, use_int8_w8a16, w1_scale, w2_scale, a1_scale, a2_scale)\u001b[0m\n\u001b[1;32m    507\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config_func(M)\n\u001b[0;32m--> 509\u001b[0m intermediate_cache1 \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mempty\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mM\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtopk_ids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mN\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    510\u001b[0m \u001b[43m                                  \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    511\u001b[0m \u001b[43m                                  \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    512\u001b[0m intermediate_cache2 \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mempty((M \u001b[38;5;241m*\u001b[39m topk_ids\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], N \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m),\n\u001b[1;32m    513\u001b[0m                                   device\u001b[38;5;241m=\u001b[39mhidden_states\u001b[38;5;241m.\u001b[39mdevice,\n\u001b[1;32m    514\u001b[0m                                   dtype\u001b[38;5;241m=\u001b[39mhidden_states\u001b[38;5;241m.\u001b[39mdtype)\n",
      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 546.00 MiB. GPU 0 has a total capacity of 47.42 GiB of which 229.62 MiB is free. Process 2320625 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 46.89 GiB memory in use. Of the allocated memory 46.10 GiB is allocated by PyTorch, with 32.07 MiB allocated in private pools (e.g., CUDA Graphs), and 81.19 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[4], line 26\u001b[0m\n\u001b[1;32m     23\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m SamplingParams(temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.3\u001b[39m, max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1200\u001b[39m)\n\u001b[1;32m     25\u001b[0m \u001b[38;5;66;03m# Generate outputs using the model\u001b[39;00m\n\u001b[0;32m---> 26\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mvllm_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mall_prompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     28\u001b[0m \u001b[38;5;66;03m# Process the generated outputs and store them in a dictionary\u001b[39;00m\n\u001b[1;32m     29\u001b[0m synthetic_questions_dict \u001b[38;5;241m=\u001b[39m {}\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/utils.py:1036\u001b[0m, in \u001b[0;36mdeprecate_kwargs.<locals>.wrapper.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m   1029\u001b[0m             msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   1031\u001b[0m         warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m   1032\u001b[0m             \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m   1033\u001b[0m             stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m,  \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m   1034\u001b[0m         )\n\u001b[0;32m-> 1036\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:348\u001b[0m, in \u001b[0;36mLLM.generate\u001b[0;34m(self, prompts, sampling_params, prompt_token_ids, use_tqdm, lora_request, prompt_adapter_request, guided_options_request)\u001b[0m\n\u001b[1;32m    339\u001b[0m     sampling_params \u001b[38;5;241m=\u001b[39m SamplingParams()\n\u001b[1;32m    341\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_and_add_requests(\n\u001b[1;32m    342\u001b[0m     inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m    343\u001b[0m     params\u001b[38;5;241m=\u001b[39msampling_params,\n\u001b[1;32m    344\u001b[0m     lora_request\u001b[38;5;241m=\u001b[39mlora_request,\n\u001b[1;32m    345\u001b[0m     prompt_adapter_request\u001b[38;5;241m=\u001b[39mprompt_adapter_request,\n\u001b[1;32m    346\u001b[0m     guided_options\u001b[38;5;241m=\u001b[39mguided_options_request)\n\u001b[0;32m--> 348\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_tqdm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_tqdm\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    349\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LLMEngine\u001b[38;5;241m.\u001b[39mvalidate_outputs(outputs, RequestOutput)\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:715\u001b[0m, in \u001b[0;36mLLM._run_engine\u001b[0;34m(self, use_tqdm)\u001b[0m\n\u001b[1;32m    713\u001b[0m total_out_toks \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m    714\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine\u001b[38;5;241m.\u001b[39mhas_unfinished_requests():\n\u001b[0;32m--> 715\u001b[0m     step_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    716\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m step_outputs:\n\u001b[1;32m    717\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m output\u001b[38;5;241m.\u001b[39mfinished:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:1223\u001b[0m, in \u001b[0;36mLLMEngine.step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1219\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allow_async_output_proc:\n\u001b[1;32m   1220\u001b[0m     execute_model_req\u001b[38;5;241m.\u001b[39masync_callback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39masync_callbacks[\n\u001b[1;32m   1221\u001b[0m         virtual_engine]\n\u001b[0;32m-> 1223\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1224\u001b[0m \u001b[43m    \u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1226\u001b[0m \u001b[38;5;66;03m# We need to do this here so that last step's sampled_token_ids can\u001b[39;00m\n\u001b[1;32m   1227\u001b[0m \u001b[38;5;66;03m# be passed to the next iteration for PP.\u001b[39;00m\n\u001b[1;32m   1228\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscheduler_config\u001b[38;5;241m.\u001b[39mis_multi_step:\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/distributed_gpu_executor.py:78\u001b[0m, in \u001b[0;36mDistributedGPUExecutor.execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m     72\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparallel_worker_tasks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_workers(\n\u001b[1;32m     73\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart_worker_execution_loop\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     74\u001b[0m         async_run_tensor_parallel_workers_only\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m     75\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mextra_execute_model_run_workers_kwargs)\n\u001b[1;32m     77\u001b[0m \u001b[38;5;66;03m# Only the driver worker returns the sampling results.\u001b[39;00m\n\u001b[0;32m---> 78\u001b[0m driver_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_driver_execute_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     79\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m driver_outputs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m     80\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m driver_outputs\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/multiproc_gpu_executor.py:162\u001b[0m, in \u001b[0;36mMultiprocessingGPUExecutor._driver_execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m    154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_driver_execute_model\u001b[39m(\n\u001b[1;32m    155\u001b[0m     \u001b[38;5;28mself\u001b[39m, execute_model_req: Optional[ExecuteModelRequest]\n\u001b[1;32m    156\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Optional[List[SamplerOutput]]:\n\u001b[1;32m    157\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Run execute_model in the driver worker.\u001b[39;00m\n\u001b[1;32m    158\u001b[0m \n\u001b[1;32m    159\u001b[0m \u001b[38;5;124;03m    Passing None will cause the driver to stop the model execution\u001b[39;00m\n\u001b[1;32m    160\u001b[0m \u001b[38;5;124;03m    loop running in each of the remote workers.\u001b[39;00m\n\u001b[1;32m    161\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 162\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker_base.py:327\u001b[0m, in \u001b[0;36mLocalOrDistributedWorkerBase.execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m    322\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    323\u001b[0m             \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config\u001b[38;5;241m.\u001b[39mcollect_model_execute_time):\n\u001b[1;32m    324\u001b[0m         orig_model_execute_time \u001b[38;5;241m=\u001b[39m intermediate_tensors\u001b[38;5;241m.\u001b[39mtensors\u001b[38;5;241m.\u001b[39mget(\n\u001b[1;32m    325\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_execute_time\u001b[39m\u001b[38;5;124m\"\u001b[39m, torch\u001b[38;5;241m.\u001b[39mtensor(\u001b[38;5;241m0\u001b[39m))\u001b[38;5;241m.\u001b[39mitem()\n\u001b[0;32m--> 327\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    328\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    329\u001b[0m \u001b[43m    \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkv_cache\u001b[49m\u001b[43m[\u001b[49m\u001b[43mworker_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvirtual_engine\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m    330\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkv_cache\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    331\u001b[0m \u001b[43m    \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    332\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnum_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    336\u001b[0m model_execute_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mperf_counter() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m    337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n\u001b[1;32m    338\u001b[0m     \u001b[38;5;66;03m# output is IntermediateTensors\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    115\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner_base.py:125\u001b[0m, in \u001b[0;36mdump_input_when_exception.<locals>._inner.<locals>._wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    123\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (exclude_args \u001b[38;5;129;01mor\u001b[39;00m []):\n\u001b[1;32m    124\u001b[0m             dumped_inputs[\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marg_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m arg\n\u001b[0;32m--> 125\u001b[0m     \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdump\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdumped_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfilep\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    126\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(err)(\n\u001b[1;32m    127\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError in model execution (input dumped to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfilename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m): \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    128\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(err)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:563\u001b[0m, in \u001b[0;36mLLMEngine.__reduce__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    560\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__reduce__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m    561\u001b[0m     \u001b[38;5;66;03m# This is to ensure that the LLMEngine is not referenced in\u001b[39;00m\n\u001b[1;32m    562\u001b[0m     \u001b[38;5;66;03m# the closure used to initialize Ray worker actors\u001b[39;00m\n\u001b[0;32m--> 563\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLLMEngine should not be pickled!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
      "\u001b[0;31mRuntimeError\u001b[0m: LLMEngine should not be pickled!"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "\n",
    "pd.set_option('max_colwidth', 1400)\n",
    "\n",
    "# Select a subset of the data\n",
    "all_questions = router_tagged_df_sample['text'].values.tolist()\n",
    "\n",
    "synthetic_questions_dict = {}\n",
    "all_prompts = []\n",
    "original_prompts = []  # To keep track of the original prompts\n",
    "\n",
    "for prompt in all_questions:\n",
    "    Instruction = f\"\"\"<s>[INST] Imagine you are an expert & award winning stock market analyst who specializes in asking the right questions relevant to a thorough stock research. Given a user query, generate 20 further precise, expert and relevant questions with focus on identifying similar companies related to the given query. I want exactly 20 questions. Your questions should be all-encompassing and can be answered independently. Don't use words like 'GIVEN QUERY' or 'GIVEN INDUSTRY'. If an industry or sector is provided, try to respond with diverse questions with similar sectors pertinent to financial markets. If a ratio like MarketCap or ROI is asked in the question, your response should have a slightly different ratio based on your stock market understanding. The questions should be brief and less than 20 words. If there is a company name mentioned in the question, please use the same company name (with paraphrasing as needed) in related questions. QUESTION: {prompt} Start with similar questions directly without any note or disclaimer.[/INST]\"\"\"\n",
    "    all_prompts.append(Instruction)\n",
    "    original_prompts.append(prompt)\n",
    "\n",
    "# Repeat the prompts for multiple iterations\n",
    "all_prompts = all_prompts * 3\n",
    "original_prompts = original_prompts * 3\n",
    "\n",
    "# Define the sampling parameters\n",
    "sampling_params = SamplingParams(temperature=0.3, max_tokens=1200)\n",
    "\n",
    "# Generate outputs using the model\n",
    "outputs = vllm_model.generate(all_prompts, sampling_params)\n",
    "\n",
    "# Process the generated outputs and store them in a dictionary\n",
    "synthetic_questions_dict = {}\n",
    "for output, original_prompt in zip(outputs, original_prompts):\n",
    "    generated_text = output.outputs[0].text.strip() if output.outputs else 'N/A'\n",
    "    similar_questions = generated_text.split('\\n')\n",
    "    if original_prompt in synthetic_questions_dict:\n",
    "            synthetic_questions_dict[original_prompt].extend(similar_questions)\n",
    "    else:\n",
    "        synthetic_questions_dict[original_prompt] = similar_questions\n",
    "\n",
    "# Convert the dictionary to a DataFrame\n",
    "synthetic_questions_df = pd.DataFrame(list(synthetic_questions_dict.items()), columns=['text', 'Questions'])\n",
    "\n",
    "# Explode the list of questions into separate rows\n",
    "synthetic_questions_df = synthetic_questions_df.explode('Questions')\n",
    "\n",
    "# Merge with the original DataFrame to include the 'category' column\n",
    "synthetic_questions_df = synthetic_questions_df.merge(router_tagged_df_sample[['text', 'category']], on='text').sample(frac=1)\n",
    "\n",
    "# Remove duplicates\n",
    "synthetic_questions_df = synthetic_questions_df.drop_duplicates()\n",
    "\n",
    "# Ensure the directory exists\n",
    "output_directory = \"/home/ady/Stockbuzz.ai_Data/RouterNER/\"\n",
    "os.makedirs(output_directory, exist_ok=True)\n",
    "\n",
    "# Save the DataFrame to the specified CSV file location\n",
    "output_file_path = os.path.join(output_directory, \"synthetic_questions_df.csv\")\n",
    "synthetic_questions_df.to_csv(output_file_path, index=False)\n",
    "\n",
    "# Display the final DataFrame\n",
    "synthetic_questions_df\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    " Router Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "ename": "OutOfMemoryError",
     "evalue": "CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 47.42 GiB of which 6.38 MiB is free. Process 2320625 has 260.00 MiB memory in use. Process 2325452 has 260.00 MiB memory in use. Process 2328697 has 260.00 MiB memory in use. Process 2334035 has 260.00 MiB memory in use. Process 2398498 has 45.27 GiB memory in use. Including non-PyTorch memory, this process has 1.06 GiB memory in use. Of the allocated memory 810.76 MiB is allocated by PyTorch, and 11.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[2], line 99\u001b[0m\n\u001b[1;32m     97\u001b[0m \u001b[38;5;66;03m# Load the model\u001b[39;00m\n\u001b[1;32m     98\u001b[0m model \u001b[38;5;241m=\u001b[39m BertForSequenceClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(EMBEDDING_MODEL, num_labels\u001b[38;5;241m=\u001b[39mtrain_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlabel\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mnunique())\n\u001b[0;32m---> 99\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Move the model to the specified device\u001b[39;00m\n\u001b[1;32m    101\u001b[0m \u001b[38;5;66;03m# Define the compute_metrics function\u001b[39;00m\n\u001b[1;32m    102\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_metrics\u001b[39m(p):\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/modeling_utils.py:2883\u001b[0m, in \u001b[0;36mPreTrainedModel.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   2878\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m dtype_present_in_args:\n\u001b[1;32m   2879\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   2880\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou cannot cast a GPTQ model in a new `dtype`. Make sure to load the model using `from_pretrained` using the desired\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2881\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m `dtype` by passing the correct `torch_dtype` argument.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   2882\u001b[0m         )\n\u001b[0;32m-> 2883\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1174\u001b[0m, in \u001b[0;36mModule.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1171\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1172\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m-> 1174\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:780\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    778\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    779\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 780\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    783\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    784\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    785\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    790\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    791\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:780\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    778\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    779\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 780\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    783\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    784\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    785\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    790\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    791\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
      "    \u001b[0;31m[... skipping similar frames: Module._apply at line 780 (3 times)]\u001b[0m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:780\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    778\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    779\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 780\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    783\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    784\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    785\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    790\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    791\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:805\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    801\u001b[0m \u001b[38;5;66;03m# Tensors stored in modules are graph leaves, and we don't want to\u001b[39;00m\n\u001b[1;32m    802\u001b[0m \u001b[38;5;66;03m# track autograd history of `param_applied`, so we have to use\u001b[39;00m\n\u001b[1;32m    803\u001b[0m \u001b[38;5;66;03m# `with torch.no_grad():`\u001b[39;00m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 805\u001b[0m     param_applied \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparam\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    806\u001b[0m p_should_use_set_data \u001b[38;5;241m=\u001b[39m compute_should_use_set_data(param, param_applied)\n\u001b[1;32m    808\u001b[0m \u001b[38;5;66;03m# subclasses may have multiple child tensors so we need to use swap_tensors\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1160\u001b[0m, in \u001b[0;36mModule.to.<locals>.convert\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m   1153\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m convert_to_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m t\u001b[38;5;241m.\u001b[39mdim() \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m):\n\u001b[1;32m   1154\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m t\u001b[38;5;241m.\u001b[39mto(\n\u001b[1;32m   1155\u001b[0m             device,\n\u001b[1;32m   1156\u001b[0m             dtype \u001b[38;5;28;01mif\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_floating_point() \u001b[38;5;129;01mor\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_complex() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1157\u001b[0m             non_blocking,\n\u001b[1;32m   1158\u001b[0m             memory_format\u001b[38;5;241m=\u001b[39mconvert_to_format,\n\u001b[1;32m   1159\u001b[0m         )\n\u001b[0;32m-> 1160\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1161\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1162\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_floating_point\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_complex\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1163\u001b[0m \u001b[43m        \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1164\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1165\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m   1166\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot copy out of meta tensor; no data!\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 47.42 GiB of which 6.38 MiB is free. Process 2320625 has 260.00 MiB memory in use. Process 2325452 has 260.00 MiB memory in use. Process 2328697 has 260.00 MiB memory in use. Process 2334035 has 260.00 MiB memory in use. Process 2398498 has 45.27 GiB memory in use. Including non-PyTorch memory, this process has 1.06 GiB memory in use. Of the allocated memory 810.76 MiB is allocated by PyTorch, and 11.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
     ]
    },
    {
     "ename": "",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
      "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
      "\u001b[1;31mClick <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. \n",
      "\u001b[1;31mView Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
     ]
    }
   ],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import precision_recall_fscore_support, accuracy_score\n",
    "from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback\n",
    "import gc\n",
    "import random\n",
    "import json\n",
    "import os\n",
    "\n",
    "# Set device to GPU 0\n",
    "device = torch.device(\"cuda:0\")\n",
    "\n",
    "# Clear GPU memory\n",
    "torch.cuda.empty_cache()\n",
    "gc.collect()\n",
    "\n",
    "# Load the training DataFrame\n",
    "synthetic_questions_df = pd.read_csv(\"/home/ady/Stockbuzz.ai_Data/RouterNER/synthetic_questions_df.csv\")\n",
    "\n",
    "# Updating Training Data in same format\n",
    "question_dict = synthetic_questions_df.groupby('text')['Questions'].apply(list).to_dict()\n",
    "\n",
    "# Function to create 'Question_Concatenate'\n",
    "def create_question_concatenate(row):\n",
    "    primary_key = row['text']\n",
    "    possible_values = question_dict[primary_key]\n",
    "    if len(possible_values) < 2:\n",
    "        selected_items = possible_values\n",
    "    else:\n",
    "        selected_items = random.sample(possible_values, 2)\n",
    "    return f\"{row['text']},{row['Questions']},{selected_items[0]},{selected_items[1]}\"\n",
    "\n",
    "# Apply the function to create the new column\n",
    "synthetic_questions_df['Questions'] = synthetic_questions_df.apply(create_question_concatenate, axis=1)\n",
    "synthetic_questions_df['label'] = synthetic_questions_df['category'].astype('category').cat.codes\n",
    "\n",
    "# synthetic_questions_df=synthetic_questions_df.sample(n=min(100,len(synthetic_questions_df)))\n",
    "\n",
    "# Create the category-to-label mapping\n",
    "label_to_category = dict(enumerate(synthetic_questions_df['category'].astype('category').cat.categories))\n",
    "category_to_label = {v: k for k, v in label_to_category.items()}\n",
    "\n",
    "# Save the mapping to a JSON file in the model's directory\n",
    "os.makedirs('Router_Models', exist_ok=True)\n",
    "with open('Router_Models/label_to_category.json', 'w') as f:\n",
    "    json.dump(label_to_category, f)\n",
    "with open('Router_Models/category_to_label.json', 'w') as f:\n",
    "    json.dump(category_to_label, f)\n",
    "\n",
    "# Split data based on unique 'text'\n",
    "unique_sql_all = synthetic_questions_df['text'].unique().tolist()\n",
    "train_size = int(0.8 * len(unique_sql_all))\n",
    "val_size = int(0.2 * len(unique_sql_all))\n",
    "\n",
    "train_sqls = unique_sql_all[:train_size]\n",
    "val_sqls = unique_sql_all[train_size:train_size + val_size]\n",
    "\n",
    "train_df = synthetic_questions_df[synthetic_questions_df['text'].isin(train_sqls)]\n",
    "val_df = synthetic_questions_df[synthetic_questions_df['text'].isin(val_sqls)]\n",
    "\n",
    "# Initialize the tokenizer\n",
    "EMBEDDING_MODEL = 'google-bert/bert-large-uncased'\n",
    "tokenizer = BertTokenizer.from_pretrained(EMBEDDING_MODEL)\n",
    "\n",
    "# Tokenize the datasets\n",
    "def tokenize_data(data):\n",
    "    return tokenizer(data['Questions'].astype(str).tolist(), padding=True, truncation=True, max_length=128, return_tensors='pt')\n",
    "\n",
    "train_tokens = tokenize_data(train_df)\n",
    "val_tokens = tokenize_data(val_df)\n",
    "\n",
    "train_labels = torch.tensor(train_df['label'].values, dtype=torch.long)\n",
    "val_labels = torch.tensor(val_df['label'].values, dtype=torch.long)\n",
    "\n",
    "# Create custom Dataset class\n",
    "class ArticleDataset(Dataset):\n",
    "    def __init__(self, encodings, labels):\n",
    "        self.encodings = encodings\n",
    "        self.labels = labels\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        item = {key: val[idx] for key, val in self.encodings.items()}\n",
    "        item['labels'] = self.labels[idx]\n",
    "        return item\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.labels)\n",
    "\n",
    "train_dataset = ArticleDataset(train_tokens, train_labels)\n",
    "val_dataset = ArticleDataset(val_tokens, val_labels)\n",
    "\n",
    "# Load the model\n",
    "model = BertForSequenceClassification.from_pretrained(EMBEDDING_MODEL, num_labels=train_df['label'].nunique())\n",
    "model.to(device)  # Move the model to the specified device\n",
    "\n",
    "# Define the compute_metrics function\n",
    "def compute_metrics(p):\n",
    "    preds = np.argmax(p.predictions, axis=1)\n",
    "    labels = p.label_ids\n",
    "    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n",
    "    acc = accuracy_score(labels, preds)\n",
    "    return {\n",
    "        'accuracy': acc,\n",
    "        'precision': precision,\n",
    "        'recall': recall,\n",
    "        'f1': f1,\n",
    "    }\n",
    "\n",
    "# Define the training arguments\n",
    "training_args = TrainingArguments(\n",
    "    output_dir='./results',\n",
    "    num_train_epochs=10,\n",
    "    per_device_train_batch_size=64,\n",
    "    per_device_eval_batch_size=64,\n",
    "    gradient_accumulation_steps=4,\n",
    "    warmup_steps=500,\n",
    "    weight_decay=0.01,\n",
    "    logging_dir='./logs',\n",
    "    logging_steps=10,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    save_strategy=\"epoch\",\n",
    "    load_best_model_at_end=True,\n",
    "    save_total_limit=1,\n",
    "    no_cuda=False,\n",
    "    dataloader_pin_memory=False,\n",
    ")\n",
    "\n",
    "# Define the trainer\n",
    "trainer = Trainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=val_dataset,\n",
    "    compute_metrics=compute_metrics,\n",
    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
    ")\n",
    "\n",
    "# Train the model\n",
    "trainer.train()\n",
    "\n",
    "# Save the model to the specified directory\n",
    "model.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/')\n",
    "tokenizer.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Router Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Top 3 companies by market cap?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.31s/it, est. speed input: 105.19 toks/s, output: 43.29 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: SQL \n",
      "\n",
      "Compare revenues of Apple vs Micrsoft\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.31s/it, est. speed input: 105.18 toks/s, output: 43.28 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: CompanyAnalysis \n",
      "\n",
      "Latest revenues of Amazon\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.31s/it, est. speed input: 103.46 toks/s, output: 43.29 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: CompanyAnalysis \n",
      "\n",
      "Latest revenues of Sun Pharma in India?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.32s/it, est. speed input: 104.88 toks/s, output: 43.16 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: OutOfScope \n",
      "\n",
      "which cryptocurrency should I buy?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.31s/it, est. speed input: 104.20 toks/s, output: 43.24 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: OutOfScope \n",
      "\n",
      "What is Value investing?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.32s/it, est. speed input: 103.68 toks/s, output: 43.20 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: LLM \n",
      "\n",
      "What is AWS Bedrock?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.31s/it, est. speed input: 104.23 toks/s, output: 43.25 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: CompanyAnalysis \n",
      "\n",
      "Summarize latest earning call of Nvidia\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.32s/it, est. speed input: 104.94 toks/s, output: 43.18 toks/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: CompanyAnalysis \n",
      "\n",
      "How long will the a.i frenzy continue?\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Processed prompts: 100%|██████████| 1/1 [00:02<00:00,  2.32s/it, est. speed input: 105.36 toks/s, output: 43.18 toks/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Category: RAG \n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from transformers import BertTokenizer, BertForSequenceClassification\n",
    "import openai\n",
    "import pandas as pd\n",
    "import json\n",
    "import numpy as np\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "\n",
    "# # Initialize OpenAI API client\n",
    "# OPEN_AI_API_KEY = \"EMPTY\"\n",
    "# OPENAI_API_BASE_MISTRAL = \"http://0.0.0.0:8000/v1\"\n",
    "# client_mistral = openai.OpenAI(api_key=OPEN_AI_API_KEY, base_url=OPENAI_API_BASE_MISTRAL)\n",
    "\n",
    "# TEXT_LLM_MODEL_MISTRAL = 'mistralai/Mistral-7B-Instruct-v0.2'\n",
    "\n",
    "# Load the model and tokenizer\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "model_router = BertForSequenceClassification.from_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models')\n",
    "tokenizer_router = BertTokenizer.from_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER//Router_Models')\n",
    "model_router.to(device)  # Move the model to the specified device\n",
    "\n",
    "# Load the category-to-label mapping\n",
    "with open('Router_Models/label_to_category.json', 'r') as f:\n",
    "    label_to_category_router = json.load(f)\n",
    "\n",
    "\n",
    "\n",
    "def predict_router(text):\n",
    "    model_router.eval()\n",
    "    inputs = tokenizer_router(text, return_tensors='pt', padding=True, truncation=True, max_length=128).to(device)\n",
    "    with torch.no_grad():\n",
    "        outputs = model_router(**inputs)\n",
    "    logits = outputs.logits\n",
    "    predicted_class_id = logits.argmax().item()\n",
    "    return label_to_category_router[str(predicted_class_id)]\n",
    "\n",
    "def reformulate_query(question):\n",
    "    \"\"\"Reformulate the given question using OpenAI API.\"\"\"\n",
    "    INSTRUCTION = f\"\"\"<s>[INST] Imagine you are an expert & award-winning stock market analyst who specializes in asking the right questions relevant for thorough stock research. \n",
    "    Given a user query, generate 3 further precise, expert, and relevant questions. \n",
    "    I want exactly 3 questions. Don't use words like 'GIVEN QUERY' or 'GIVEN INDUSTRY'. \n",
    "    If an industry or sector is provided, try to respond with diverse questions with similar sectors pertinent to financial markets. \n",
    "    If a ratio like MarketCap or ROI is asked in the question, your response should have a slightly different ratio based on your stock market understanding. \n",
    "    The questions should be brief and less than 20 words. If there is a company name mentioned in the question, please use the same company name (with paraphrasing as needed) in related questions.\n",
    "    If the question is asking about non-US stocks, please be explicit in the country name based on your knowledge. \n",
    "    Don't change the intent of related questions, e.g., data-seeking question vs. knowledge-seeking question. QUESTION: {question} Start with similar questions directly without any note or disclaimer.[/INST]\"\"\"\n",
    "\n",
    "    # response = client_mistral.chat.completions.create(\n",
    "    #     model=TEXT_LLM_MODEL_MISTRAL,\n",
    "    #     messages=[{\"role\": \"user\", \"content\": INSTRUCTION}],\n",
    "    #     max_tokens=100,\n",
    "    #     temperature=0.2\n",
    "    # )\n",
    "    # content = response.choices[0].message.content\n",
    "    # reformulated_query = question + \", Related Question:\" + content\n",
    "    \n",
    "    # return reformulated_query\n",
    "\n",
    "    sampling_params = SamplingParams(temperature=0.2, max_tokens=100)\n",
    "    outputs = vllm_model.generate([INSTRUCTION], sampling_params=sampling_params)\n",
    "    \n",
    "    generated_text = outputs[0].outputs[0].text.strip()  # Get the generated text from the model\n",
    "    reformulated_query = question + \", Related Question: \" + generated_text\n",
    "    \n",
    "    return reformulated_query\n",
    "\n",
    "# Example usage:\n",
    "questions = [\"Top 3 companies by market cap?\",\n",
    "             \"Compare revenues of Apple vs Micrsoft\",\n",
    "             \"Latest revenues of Amazon\",\n",
    "             \"Latest revenues of Sun Pharma in India?\",\n",
    "             \"which cryptocurrency should I buy?\",\n",
    "             \"What is Value investing?\",\n",
    "             \"What is AWS Bedrock?\",\"Summarize latest earning call of Nvidia\",\"How long will the a.i frenzy continue?\"]\n",
    "for text in questions:\n",
    "    print(text)\n",
    "    reformulated_query_llm = reformulate_query(text)\n",
    "    category = predict_router(reformulated_query_llm)\n",
    "    print(f\"Category: {category}\",'\\n')\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Subrouter"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "SubRouter - Data "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from huggingface_hub import login\n",
    "import os \n",
    "import torch\n",
    "torch.cuda.empty_cache()\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "\n",
    "# Use your Hugging Face token here\n",
    "login(\"\")\n",
    "\n",
    "from vllm import LLM, SamplingParams\n",
    "\n",
    "\n",
    "TEXT_LLM_MODEL_MISTRAL='meta-llama/Meta-Llama-3.1-8B-Instruct'\n",
    "\n",
    "\n",
    "\n",
    "vllm_model = LLM(\n",
    "        model=TEXT_LLM_MODEL_MISTRAL,\n",
    "        tensor_parallel_size=1,         # Use both GPUs\n",
    "        gpu_memory_utilization=0.95,     # Lower memory utilization to prevent OOM\n",
    "        max_model_len=26000)\n",
    "        \n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import openai\n",
    "import aiohttp\n",
    "import asyncio\n",
    "import nest_asyncio\n",
    "import re\n",
    "pd.set_option('max_colwidth', 2400)\n",
    "\n",
    "\n",
    "categories = {\n",
    "    1: {\n",
    "        \"name\": \"fair valuation\",\n",
    "        \"prompt\": \"As a stock market analyst, generate a set of diverse and comprehensive questions that explore various aspects of determining the fair valuation of a company. These questions should cover methodologies such as discounted cash flow analysis. Ensure the questions address different valuation techniques or whether to buy or sell a stock or fair value of the company or anything about the stock price of a company.\"\n",
    "    },\n",
    "    2: {\n",
    "        \"name\": \"industry analysis\",\n",
    "        \"prompt\": \"Generate a diverse range of questions aimed at conducting an in-depth analysis of different industries. The questions should cover industry trends, competitive landscape, regulatory impacts, and market opportunities. Include questions that focus on the analysis of industry such as electric vehicle, e-commerce, retail, technology etc.\"\n",
    "    },\n",
    "    3: {\n",
    "        \"name\": \"single company analysis\",\n",
    "        \"prompt\": \"Create a set of detailed questions that focus on the comprehensive analysis of a single company. These questions should encompass the company's financial health, business model, management effectiveness, market position, and competitive advantages. Ensure the questions cover key financial statements, ratios, and strategic initiatives such as What do you think about Amazon stock.\"\n",
    "    },\n",
    "    4: {\n",
    "        \"name\": \"multiple companies analysis\",\n",
    "        \"prompt\": \"Generate a variety of questions designed to compare and contrast multiple companies within the same industry or sector. These questions should necessarily have more than 1 company to compare and contrast on single or multiple parameters or same industry or different industry.\"\n",
    "    }\n",
    "}\n",
    "\n",
    "# Define example questions for each category\n",
    "example_questions = {\n",
    "    1: [\n",
    "        \"Will GME's stock price increase today?\",\n",
    "        \"Abercrombie is trading at $190 per share; is it a good buy?\",\n",
    "        \"What is the intrinsic value of Brookfield Corporation?\",\n",
    "        \"Is Dynagas a good stock for the long run?\",\n",
    "        \"Is Taiwan Semiconductor a good stock for the long run?\",\n",
    "        \"Is it the right time to buy Nvidia stock?\",\n",
    "        \"Why is Tesla considered overrated?\",\n",
    "        \"What is the intrinsic value of Microsoft based on a discounted cash flow analysis?\",\n",
    "        \"How does the price-to-earnings ratio of Amazon compare to its historical average?\",\n",
    "        \"What is the fair value of Tesla based on comparable company analysis?\",\n",
    "        \"How does the enterprise value to EBITDA ratio of Facebook compare to industry peers?\",\n",
    "        \"What is the impact of interest rate changes on the valuation of Apple?\",\n",
    "        \"How does the price-to-sales ratio of Google compare to other tech giants?\",\n",
    "        \"What is the fair value of Netflix using a precedent transactions analysis?\",\n",
    "        \"How do changes in tax policy affect the valuation of Amazon?\",\n",
    "        \"What is the intrinsic value of Uber based on a discounted cash flow analysis?\",\n",
    "        \"How does the price-to-book ratio of Microsoft compare to its historical average?\",\n",
    "        \"What is the fair value of Facebook using a multi-stage growth model?\",\n",
    "        \"How do changes in market conditions impact the valuation of Apple?\",\n",
    "        \"What is the intrinsic value of Tesla based on a residual income model?\",\n",
    "        \"How does the enterprise value to revenue ratio of Amazon compare to its peers?\",\n",
    "        \"What is the fair value of Google using an excess earnings model?\",\n",
    "        \"How do changes in cost of capital affect the valuation of Netflix?\",\n",
    "        \"What is the intrinsic value of Uber based on a real options analysis?\",\n",
    "        \"How does the price-to-cash-flow ratio of Microsoft compare to its historical average?\",\n",
    "        \"What is the fair value of Facebook using an economic profit model?\",\n",
    "        \"How do changes in growth expectations impact the valuation of Tesla?\"\n",
    "    ],\n",
    "    2: [\n",
    "        \"Will electric vehicles outperform the market this year?\",\n",
    "        \"Analyze retail industry for me\",\n",
    "        \"Is Artificial Intelligence a hype?\",\n",
    "        \"What are the major trends in the e-commerce industry?\",\n",
    "        \"How has the renewable energy sector evolved over the past decade?\",\n",
    "        \"What are the key drivers of growth in the biotechnology industry?\",\n",
    "        \"How has the competitive landscape changed in the automotive industry?\",\n",
    "        \"What are the regulatory challenges facing the pharmaceutical industry?\",\n",
    "        \"How has the rise of fintech impacted the financial services sector?\",\n",
    "        \"What are the growth prospects for the telecommunications industry?\",\n",
    "        \"How has the adoption of AI affected the technology sector?\",\n",
    "        \"What are the key challenges facing the retail industry?\",\n",
    "        \"How has the global supply chain crisis impacted the manufacturing industry?\",\n",
    "        \"What are the major trends in the healthcare industry?\",\n",
    "        \"How has the rise of streaming services affected the media industry?\",\n",
    "        \"What are the key opportunities in the electric vehicle market?\",\n",
    "        \"How has the COVID-19 pandemic impacted the travel and tourism industry?\",\n",
    "        \"What are the growth drivers for the cybersecurity industry?\",\n",
    "        \"How has the real estate market evolved in recent years?\",\n",
    "        \"What are the key factors influencing the oil and gas industry?\",\n",
    "        \"How has the shift to remote work impacted the tech industry?\",\n",
    "        \"What are the major challenges facing the agriculture industry?\",\n",
    "        \"How has the rise of social media influenced the advertising industry?\"\n",
    "    ],\n",
    "    3: [\n",
    "        \"What are the key risks for HRBR?\",\n",
    "        \"Analyse the financial health of CVS.\",\n",
    "        \"What is the revenue trend for Arch Resources?\",\n",
    "        \"Is Amazon a good buy?\",\n",
    "        \"Tell me about Amazon's business model.\",\n",
    "        \"Analyse the latest earnings call of Meta.\",\n",
    "        \"How have Amazon's focus areas changed?\",\n",
    "        \"What are the key risks investing in Nvidia?\",\n",
    "        \"What are the key risks investing in Starbucks?\",\n",
    "        \"What is Starbucks' revenue breakdown?\",\n",
    "        \"What is Apple's revenue breakdown?\",\n",
    "        \"What is the ROI of Starbucks?\",\n",
    "        \"Provide an overview of BYD.\",\n",
    "        \"What are the key financial ratios for evaluating Apple's performance?\",\n",
    "        \"How has Tesla's market share evolved over the past five years?\",\n",
    "        \"What are the major revenue streams for Alphabet (Google)?\",\n",
    "        \"What is the impact of recent regulatory changes on Facebook's business?\",\n",
    "        \"How does Microsoft's cloud business compare to its other segments?\",\n",
    "        \"What are the growth prospects for Netflix in international markets?\",\n",
    "        \"What are the key risks associated with investing in Uber?\",\n",
    "        \"How has Starbucks' expansion strategy impacted its financial performance?\",\n",
    "        \"What are the main factors driving Amazon's profitability?\",\n",
    "        \"How has Disney's acquisition strategy affected its financial health?\",\n",
    "        \"What are the key financial metrics for evaluating IBM's performance?\",\n",
    "        \"How has Twitter's user growth impacted its revenue?\",\n",
    "        \"What are the strategic initiatives of Coca-Cola to maintain market leadership?\",\n",
    "        \"How has Pfizer's R&D spending influenced its product pipeline?\",\n",
    "        \"What are the major cost components for Walmart?\",\n",
    "        \"How does Apple's product diversification impact its revenue stability?\",\n",
    "        \"What are the competitive advantages of Nvidia in the semiconductor industry?\",\n",
    "        \"How has Tesla's focus on autonomous driving technology affected its market position?\",\n",
    "        \"What are the financial implications of Google's recent acquisitions?\",\n",
    "        \"How has Microsoft's focus on subscription services impacted its revenue?\"\n",
    "    ],\n",
    "    4: [\n",
    "        \"Compare the fundamentals of AMD and Intel over the last 5 years.\",\n",
    "        \"How does Nvidia's profitability compare to other semiconductor companies?\",\n",
    "        \"Costco Vs Walmart\",\n",
    "        \"How do the financial metrics of Apple compare to those of Samsung?\",\n",
    "        \"What are the key differences in business models between Amazon and Alibaba?\",\n",
    "        \"How does the profitability of Google compare to Facebook?\",\n",
    "        \"What are the growth prospects for Netflix compared to Disney+?\",\n",
    "        \"How does the market share of Ford compare to General Motors?\",\n",
    "        \"What are the key financial metrics for comparing Coca-Cola and PepsiCo?\",\n",
    "        \"How does the revenue growth of Microsoft compare to IBM?\",\n",
    "        \"What are the major differences in cost structures between Walmart and Target?\",\n",
    "        \"How does the dividend policy of Verizon compare to AT&T?\",\n",
    "        \"What are the key financial ratios for comparing ExxonMobil and Chevron?\",\n",
    "        \"How does the R&D spending of Intel compare to AMD?\",\n",
    "        \"What are the growth strategies of Uber compared to Lyft?\",\n",
    "        \"How does the market capitalization of Tesla compare to Ford?\",\n",
    "        \"What are the key differences in revenue streams between Netflix and Hulu?\",\n",
    "        \"How does the financial performance of Visa compare to Mastercard?\",\n",
    "        \"What are the competitive advantages of Nike compared to Adidas?\",\n",
    "        \"How does the profitability of Procter & Gamble compare to Unilever?\",\n",
    "        \"What are the key financial metrics for comparing JPMorgan Chase and Goldman Sachs?\",\n",
    "        \"How does the market share of McDonald's compare to Burger King?\",\n",
    "        \"What are the major differences in business strategies between Apple and Microsoft?\"\n",
    "    ]\n",
    "}\n",
    "\n",
    "df_list=[]\n",
    "for category_id, details in categories.items():\n",
    "        for example_question in example_questions[category_id]:\n",
    "            for _ in range(100):  # Repeat 10 times\n",
    "                prompt = f\"\"\" Category: {category_id}. You are an expert stock market analyst tasked with creating questions for training a model. Here are some example questions for reference:\\n\\n{example_question}\\n\\nNow, generate 1 unique, diverse, and insightful question specifically for {details['name']}. Ensure the question provides significant analytical depth and covers the key aspects outlined. Start generating the questions directly without any preamble. Please remember that you have to generate just one question and stop your response once you have provided the question, Provide question in xml tags i.e <question>...</question>\"\"\"\n",
    "                df_list.append({'Category': details['name'], 'Prompt': prompt})\n",
    "\n",
    "# Create a DataFrame from the list\n",
    "full_df = pd.DataFrame(df_list)\n",
    "\n",
    "prompts=[]\n",
    "all_questions=full_df['Prompt'].values.tolist()\n",
    "\n",
    "for question in  all_questions:\n",
    "    prompts.append(question)\n",
    "\n",
    "sampling_params = SamplingParams(\n",
    "        temperature=0.7,\n",
    "        max_tokens=100\n",
    "    )\n",
    "\n",
    "outputs = vllm_model.generate(prompts, sampling_params)\n",
    "\n",
    "llm_output=[]\n",
    "for output in outputs:\n",
    "    try:\n",
    "        generated_text = output.outputs[0].text\n",
    "        match = re.search(r'<question>(.*?)</question>', generated_text, re.DOTALL)\n",
    "        question_text = match.group(1).strip()  # Strips any leading/trailing whitespace\n",
    "\n",
    "\n",
    "        llm_output.append(question_text)\n",
    "    except:\n",
    "        llm_output.append('')\n",
    "\n",
    "\n",
    "full_df['Response'] =llm_output\n",
    "display(full_df)\n",
    "full_df=full_df[full_df['Response']!='']\n",
    "full_df.to_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Combined_Ady_Database.csv', index=False)# print(\"Synthetic questions dataset created and saved to 'Ady_Database4.csv'.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "SubRouter - Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
      "  warnings.warn(\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n",
      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33m19aditiyadav\u001b[0m (\u001b[33m19aditiyadav-stockbuzz\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "wandb version 0.18.0 is available!  To upgrade, please run:\n",
       " $ pip install wandb --upgrade"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Tracking run with wandb version 0.17.7"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Run data is saved locally in <code>/home/ady/StockBuzz_Experiments/AgentsTest/NER/wandb/run-20240915_202121-m4a5bz9k</code>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "Syncing run <strong><a href='https://wandb.ai/19aditiyadav-stockbuzz/huggingface/runs/m4a5bz9k' target=\"_blank\">./results</a></strong> to <a href='https://wandb.ai/19aditiyadav-stockbuzz/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View project at <a href='https://wandb.ai/19aditiyadav-stockbuzz/huggingface' target=\"_blank\">https://wandb.ai/19aditiyadav-stockbuzz/huggingface</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       " View run at <a href='https://wandb.ai/19aditiyadav-stockbuzz/huggingface/runs/m4a5bz9k' target=\"_blank\">https://wandb.ai/19aditiyadav-stockbuzz/huggingface/runs/m4a5bz9k</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='310' max='310' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [310/310 05:22, Epoch 5/5]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Epoch</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Accuracy</th>\n",
       "      <th>Precision</th>\n",
       "      <th>Recall</th>\n",
       "      <th>F1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.948000</td>\n",
       "      <td>0.775363</td>\n",
       "      <td>0.886082</td>\n",
       "      <td>0.893503</td>\n",
       "      <td>0.886082</td>\n",
       "      <td>0.887385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0.235800</td>\n",
       "      <td>0.169679</td>\n",
       "      <td>0.959794</td>\n",
       "      <td>0.960594</td>\n",
       "      <td>0.959794</td>\n",
       "      <td>0.959883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0.152600</td>\n",
       "      <td>0.123418</td>\n",
       "      <td>0.964948</td>\n",
       "      <td>0.965072</td>\n",
       "      <td>0.964948</td>\n",
       "      <td>0.964948</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0.086800</td>\n",
       "      <td>0.145899</td>\n",
       "      <td>0.963918</td>\n",
       "      <td>0.964666</td>\n",
       "      <td>0.963918</td>\n",
       "      <td>0.963899</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.076000</td>\n",
       "      <td>0.161085</td>\n",
       "      <td>0.962371</td>\n",
       "      <td>0.963215</td>\n",
       "      <td>0.962371</td>\n",
       "      <td>0.962410</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "('SubRouter_Models/tokenizer_config.json',\n",
       " 'SubRouter_Models/special_tokens_map.json',\n",
       " 'SubRouter_Models/vocab.txt',\n",
       " 'SubRouter_Models/added_tokens.json')"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
    "from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback\n",
    "import gc\n",
    "import os\n",
    "import json\n",
    "\n",
    "import os\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
    "\n",
    "\n",
    "\n",
    "# Set device to GPU 0 if available\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "# Clear GPU memory\n",
    "torch.cuda.empty_cache()\n",
    "gc.collect()\n",
    "\n",
    "# Load the generated CSV\n",
    "df = pd.read_csv(\"/home/ady/Stockbuzz.ai_Data/RouterNER/Combined_Ady_Database.csv\")\n",
    "df = df.sample(n=min(10000,len(df)))\n",
    "\n",
    "df.rename(columns={'Category': 'category', 'Response': 'Questions'}, inplace=True)\n",
    "\n",
    "# Create the category-to-label mapping\n",
    "df['label'] = df['category'].astype('category').cat.codes\n",
    "label_to_category = dict(enumerate(df['category'].astype('category').cat.categories))\n",
    "category_to_label = {v: k for k, v in label_to_category.items()}\n",
    "\n",
    "# Save the mapping to a JSON file in the model's directory\n",
    "os.makedirs('SubRouter_Models', exist_ok=True)\n",
    "with open('SubRouter_Models/label_to_category.json', 'w') as f:\n",
    "    json.dump(label_to_category, f)\n",
    "with open('SubRouter_Models/category_to_label.json', 'w') as f:\n",
    "    json.dump(category_to_label, f)\n",
    "\n",
    "# Split the dataset into training and validation sets based on unique 'Questions'\n",
    "unique_questions = df['Questions'].unique().tolist()\n",
    "train_size = int(0.8 * len(unique_questions))\n",
    "val_size = int(0.2 * len(unique_questions))\n",
    "\n",
    "train_questions = unique_questions[:train_size]\n",
    "val_questions = unique_questions[train_size:train_size + val_size]\n",
    "\n",
    "train_df = df[df['Questions'].isin(train_questions)]\n",
    "val_df = df[df['Questions'].isin(val_questions)]\n",
    "\n",
    "# Initialize the BERT tokenizer and model\n",
    "checkpoint=\"google-bert/bert-large-uncased\"\n",
    "tokenizer_subrouter = BertTokenizer.from_pretrained(checkpoint)\n",
    "model_subrouter = BertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(df['category'].unique()))\n",
    "model_subrouter.to(device)  # Move the model to the specified device\n",
    "\n",
    "# Tokenize the datasets\n",
    "def tokenize_data(data):\n",
    "    return tokenizer_subrouter(data['Questions'].astype(str).tolist(), padding=True, truncation=True, max_length=64, return_tensors='pt')\n",
    "\n",
    "train_tokens = tokenize_data(train_df)\n",
    "val_tokens = tokenize_data(val_df)\n",
    "\n",
    "train_labels = torch.tensor(train_df['label'].values, dtype=torch.long)\n",
    "val_labels = torch.tensor(val_df['label'].values, dtype=torch.long)\n",
    "\n",
    "# Create custom Dataset class\n",
    "class ArticleDataset(torch.utils.data.Dataset):\n",
    "    def __init__(self, encodings, labels):\n",
    "        self.encodings = encodings\n",
    "        self.labels = labels\n",
    "\n",
    "    def __getitem__(self, idx):\n",
    "        item = {key: val[idx] for key, val in self.encodings.items()}\n",
    "        item['labels'] = self.labels[idx]\n",
    "        return item\n",
    "\n",
    "    def __len__(self):\n",
    "        return len(self.labels)\n",
    "\n",
    "train_dataset = ArticleDataset(train_tokens, train_labels)\n",
    "val_dataset = ArticleDataset(val_tokens, val_labels)\n",
    "\n",
    "# Define the compute_metrics function for evaluation\n",
    "def compute_metrics(pred):\n",
    "    labels = pred.label_ids\n",
    "    preds = pred.predictions.argmax(-1)\n",
    "    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n",
    "    acc = accuracy_score(labels, preds)\n",
    "    return {\n",
    "        'accuracy': acc,\n",
    "        'precision': precision,\n",
    "        'recall': recall,\n",
    "        'f1': f1,\n",
    "    }\n",
    "\n",
    "# Define the training arguments\n",
    "training_args = TrainingArguments(\n",
    "    output_dir='./results',\n",
    "    num_train_epochs=5,\n",
    "    per_device_train_batch_size=128,\n",
    "    per_device_eval_batch_size=128,\n",
    "    warmup_steps=500,\n",
    "    weight_decay=0.01,\n",
    "    logging_dir='./logs',\n",
    "    logging_steps=10,\n",
    "    evaluation_strategy=\"epoch\",\n",
    "    save_strategy=\"epoch\",\n",
    "    load_best_model_at_end=True,\n",
    "    save_total_limit=1,\n",
    "    no_cuda=False,\n",
    "    dataloader_pin_memory=False,\n",
    ")\n",
    "\n",
    "# Define the trainer\n",
    "trainer = Trainer(\n",
    "    model=model_subrouter,\n",
    "    args=training_args,\n",
    "    train_dataset=train_dataset,\n",
    "    eval_dataset=val_dataset,\n",
    "    compute_metrics=compute_metrics,\n",
    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
    ")\n",
    "\n",
    "# Train the model\n",
    "trainer.train()\n",
    "\n",
    "# Save the model and tokenizer to the specified directory\n",
    "model_subrouter.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/')\n",
    "tokenizer_subrouter.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "SubRouter Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Question: Compare depreciation ttm of amazon and intol\n",
      "Predicted Category: multiple companies analysis\n",
      "\n",
      "Question: What is the current market trend for technology stocks?\n",
      "Predicted Category: industry analysis\n",
      "\n",
      "Question: How did shakti pumps perform in the last quarter?\n",
      "Predicted Category: single company analysis\n",
      "\n",
      "Question: Compare Amazon vs Apple in terms of revenues\n",
      "Predicted Category: multiple companies analysis\n",
      "\n",
      "Question: What is the reasonable price of Nvidia?\n",
      "Predicted Category: fair valuation\n",
      "\n",
      "Question: Compare Apple vs Micrsoft\n",
      "Predicted Category: multiple companies analysis\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from transformers import BertTokenizer, BertForSequenceClassification\n",
    "import json\n",
    "\n",
    "# Load the model and tokenizer\n",
    "device = torch.device(\"cuda\")\n",
    "model_subrouter = BertForSequenceClassification.from_pretrained('./SubRouter_Models')\n",
    "tokenizer_subrouter = BertTokenizer.from_pretrained('./SubRouter_Models')\n",
    "model_subrouter.to(device)  # Move the model to the specified device\n",
    "\n",
    "# Load the category-to-label mapping\n",
    "with open('SubRouter_Models/label_to_category.json', 'r') as f:\n",
    "    label_to_category_subrouter = json.load(f)\n",
    "\n",
    "def predict_subrouter(text):\n",
    "    model_subrouter.eval()\n",
    "    inputs = tokenizer_subrouter(text, return_tensors='pt', padding=True, truncation=True, max_length=64).to(device)\n",
    "    with torch.no_grad():\n",
    "        outputs = model_subrouter(**inputs)\n",
    "    logits = outputs.logits\n",
    "    predicted_class_id = logits.argmax().item()\n",
    "    return label_to_category_subrouter[str(predicted_class_id)]\n",
    "\n",
    "# Example usage:\n",
    "questions = [\"Compare depreciation ttm of amazon and intol\",\n",
    "    \"What is the current market trend for technology stocks?\",\n",
    "             \"How did shakti pumps perform in the last quarter?\",\n",
    "             \"Compare Amazon vs Apple in terms of revenues\",\n",
    "             \"What is the reasonable price of Nvidia?\",\n",
    "             \"Compare Apple vs Micrsoft\"]\n",
    "for text in questions:\n",
    "    category = predict_subrouter(text)\n",
    "    print(f\"Question: {text}\\nPredicted Category: {category}\\n\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NER"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
      "Token is valid (permission: fineGrained).\n",
      "Your token has been saved to /home/ady/.cache/huggingface/token\n",
      "Login successful\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-09-15 21:07:20,332\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 09-15 21:07:20 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=26000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llama/Meta-Llama-3.1-8B-Instruct, use_v2_block_manager=False, num_scheduler_steps=1, enable_prefix_caching=False, use_async_output_proc=True)\n",
      "INFO 09-15 21:07:21 model_runner.py:997] Starting to load model meta-llama/Meta-Llama-3.1-8B-Instruct...\n",
      "INFO 09-15 21:07:22 weight_utils.py:242] Using model weights format ['*.safetensors']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]\n",
      "Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:00<00:00,  6.39it/s]\n",
      "Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:00<00:00,  2.18it/s]\n",
      "Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:01<00:00,  1.74it/s]\n",
      "Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00,  1.59it/s]\n",
      "Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00,  1.77it/s]\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO 09-15 21:07:24 model_runner.py:1008] Loading model weights took 14.9888 GB\n",
      "INFO 09-15 21:07:28 gpu_executor.py:122] # GPU blocks: 13756, # CPU blocks: 2048\n",
      "INFO 09-15 21:07:29 model_runner.py:1311] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.\n",
      "INFO 09-15 21:07:29 model_runner.py:1315] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n"
     ]
    },
    {
     "ename": "OutOfMemoryError",
     "evalue": "CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.43 GiB of which 16.69 MiB is free. Including non-PyTorch memory, this process has 47.38 GiB memory in use. Of the allocated memory 46.94 GiB is allocated by PyTorch, with 33.31 MiB allocated in private pools (e.g., CUDA Graphs), and 41.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[14], line 18\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvllm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LLM, SamplingParams\n\u001b[1;32m     14\u001b[0m TEXT_LLM_MODEL_MISTRAL\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmeta-llama/Meta-Llama-3.1-8B-Instruct\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 18\u001b[0m vllm_model \u001b[38;5;241m=\u001b[39m \u001b[43mLLM\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     19\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTEXT_LLM_MODEL_MISTRAL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     20\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtensor_parallel_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m         \u001b[49m\u001b[38;5;66;43;03m# Use both GPUs\u001b[39;49;00m\n\u001b[1;32m     21\u001b[0m \u001b[43m        \u001b[49m\u001b[43mgpu_memory_utilization\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.95\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m     \u001b[49m\u001b[38;5;66;43;03m# Lower memory utilization to prevent OOM\u001b[39;49;00m\n\u001b[1;32m     22\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_model_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m26000\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:178\u001b[0m, in \u001b[0;36mLLM.__init__\u001b[0;34m(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_context_len_to_capture, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, **kwargs)\u001b[0m\n\u001b[1;32m    154\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m    155\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThere is no need to pass vision-related arguments anymore.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    156\u001b[0m engine_args \u001b[38;5;241m=\u001b[39m EngineArgs(\n\u001b[1;32m    157\u001b[0m     model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m    158\u001b[0m     tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    176\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m    177\u001b[0m )\n\u001b[0;32m--> 178\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine \u001b[38;5;241m=\u001b[39m \u001b[43mLLMEngine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_engine_args\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    179\u001b[0m \u001b[43m    \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43musage_context\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mUsageContext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mLLM_CLASS\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    180\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter \u001b[38;5;241m=\u001b[39m Counter()\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:550\u001b[0m, in \u001b[0;36mLLMEngine.from_engine_args\u001b[0;34m(cls, engine_args, usage_context, stat_loggers)\u001b[0m\n\u001b[1;32m    548\u001b[0m executor_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_executor_cls(engine_config)\n\u001b[1;32m    549\u001b[0m \u001b[38;5;66;03m# Create the LLM engine.\u001b[39;00m\n\u001b[0;32m--> 550\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    551\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mengine_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    552\u001b[0m \u001b[43m    \u001b[49m\u001b[43mexecutor_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecutor_class\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    553\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlog_stats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdisable_log_stats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    554\u001b[0m \u001b[43m    \u001b[49m\u001b[43musage_context\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43musage_context\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    555\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstat_loggers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstat_loggers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    556\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    558\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m engine\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:331\u001b[0m, in \u001b[0;36mLLMEngine.__init__\u001b[0;34m(self, model_config, cache_config, parallel_config, scheduler_config, device_config, load_config, lora_config, speculative_config, decoding_config, observability_config, prompt_adapter_config, executor_class, log_stats, usage_context, stat_loggers, input_registry)\u001b[0m\n\u001b[1;32m    317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_executor \u001b[38;5;241m=\u001b[39m executor_class(\n\u001b[1;32m    318\u001b[0m     model_config\u001b[38;5;241m=\u001b[39mmodel_config,\n\u001b[1;32m    319\u001b[0m     cache_config\u001b[38;5;241m=\u001b[39mcache_config,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    327\u001b[0m     observability_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config,\n\u001b[1;32m    328\u001b[0m )\n\u001b[1;32m    330\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39membedding_mode:\n\u001b[0;32m--> 331\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_kv_caches\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[38;5;66;03m# If usage stat is enabled, collect relevant info.\u001b[39;00m\n\u001b[1;32m    334\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_usage_stats_enabled():\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:473\u001b[0m, in \u001b[0;36mLLMEngine._initialize_kv_caches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_gpu_blocks \u001b[38;5;241m=\u001b[39m num_gpu_blocks\n\u001b[1;32m    471\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_cpu_blocks \u001b[38;5;241m=\u001b[39m num_cpu_blocks\n\u001b[0;32m--> 473\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_gpu_blocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cpu_blocks\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/gpu_executor.py:125\u001b[0m, in \u001b[0;36mGPUExecutor.initialize_cache\u001b[0;34m(self, num_gpu_blocks, num_cpu_blocks)\u001b[0m\n\u001b[1;32m    119\u001b[0m \u001b[38;5;66;03m# NOTE: This is logged in the executor because there can be >1 worker\u001b[39;00m\n\u001b[1;32m    120\u001b[0m \u001b[38;5;66;03m# with other executors. We could log in the engine level, but work\u001b[39;00m\n\u001b[1;32m    121\u001b[0m \u001b[38;5;66;03m# remains to abstract away the device for non-GPU configurations.\u001b[39;00m\n\u001b[1;32m    122\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m# GPU blocks: \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m, # CPU blocks: \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, num_gpu_blocks,\n\u001b[1;32m    123\u001b[0m             num_cpu_blocks)\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_gpu_blocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cpu_blocks\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker.py:266\u001b[0m, in \u001b[0;36mWorker.initialize_cache\u001b[0;34m(self, num_gpu_blocks, num_cpu_blocks)\u001b[0m\n\u001b[1;32m    263\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_cpu_blocks \u001b[38;5;241m=\u001b[39m num_cpu_blocks\n\u001b[1;32m    265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_cache_engine()\n\u001b[0;32m--> 266\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_warm_up_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker.py:282\u001b[0m, in \u001b[0;36mWorker._warm_up_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    280\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_warm_up_model\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    281\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39menforce_eager:\n\u001b[0;32m--> 282\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapture_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgpu_cache\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    283\u001b[0m     \u001b[38;5;66;03m# Reset the seed to ensure that the random state is not affected by\u001b[39;00m\n\u001b[1;32m    284\u001b[0m     \u001b[38;5;66;03m# the model initialization and profiling.\u001b[39;00m\n\u001b[1;32m    285\u001b[0m     set_random_seed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mseed)\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    115\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1422\u001b[0m, in \u001b[0;36mGPUModelRunnerBase.capture_model\u001b[0;34m(self, kv_caches)\u001b[0m\n\u001b[1;32m   1415\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_seqlen_agnostic:\n\u001b[1;32m   1416\u001b[0m     \u001b[38;5;66;03m# Only used by Mamba-based models CUDA graph atm (Jamba)\u001b[39;00m\n\u001b[1;32m   1417\u001b[0m     capture_inputs\u001b[38;5;241m.\u001b[39mupdate({\n\u001b[1;32m   1418\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseqlen_agnostic_capture_inputs\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m   1419\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mget_seqlen_agnostic_capture_inputs(\n\u001b[1;32m   1420\u001b[0m             batch_size)\n\u001b[1;32m   1421\u001b[0m     })\n\u001b[0;32m-> 1422\u001b[0m \u001b[43mgraph_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapture\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcapture_inputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1423\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_memory_pool \u001b[38;5;241m=\u001b[39m graph_runner\u001b[38;5;241m.\u001b[39mgraph\u001b[38;5;241m.\u001b[39mpool()\n\u001b[1;32m   1424\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_runners[virtual_engine][batch_size] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m   1425\u001b[0m     graph_runner)\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1665\u001b[0m, in \u001b[0;36mCUDAGraphRunner.capture\u001b[0;34m(self, input_ids, positions, hidden_or_intermediate_states, intermediate_inputs, kv_caches, attn_metadata, memory_pool, stream, **kwargs)\u001b[0m\n\u001b[1;32m   1660\u001b[0m \u001b[38;5;66;03m# Run the model a few times without capturing the graph.\u001b[39;00m\n\u001b[1;32m   1661\u001b[0m \u001b[38;5;66;03m# This is to make sure that the captured graph does not include the\u001b[39;00m\n\u001b[1;32m   1662\u001b[0m \u001b[38;5;66;03m# kernel launches for initial benchmarking (e.g., Triton autotune).\u001b[39;00m\n\u001b[1;32m   1663\u001b[0m \u001b[38;5;66;03m# Note one iteration is not enough for torch.jit.script\u001b[39;00m\n\u001b[1;32m   1664\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(_NUM_WARMUP_ITERS):\n\u001b[0;32m-> 1665\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1666\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1667\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1668\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1669\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1670\u001b[0m \u001b[43m        \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1671\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1672\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1673\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39msynchronize()\n\u001b[1;32m   1675\u001b[0m \u001b[38;5;66;03m# Capture the graph.\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1565\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:448\u001b[0m, in \u001b[0;36mLlamaForCausalLM.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m    440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\n\u001b[1;32m    441\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    442\u001b[0m     input_ids: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    446\u001b[0m     intermediate_tensors: Optional[IntermediateTensors] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    447\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[torch\u001b[38;5;241m.\u001b[39mTensor, IntermediateTensors]:\n\u001b[0;32m--> 448\u001b[0m     model_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    449\u001b[0m \u001b[43m                              \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    450\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m model_output\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1565\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:329\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors, inputs_embeds)\u001b[0m\n\u001b[1;32m    327\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstart_layer, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mend_layer):\n\u001b[1;32m    328\u001b[0m     layer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlayers[i]\n\u001b[0;32m--> 329\u001b[0m     hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    330\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    331\u001b[0m \u001b[43m        \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    332\u001b[0m \u001b[43m        \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_layer\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    333\u001b[0m \u001b[43m        \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    334\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresidual\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    335\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n\u001b[1;32m    338\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m IntermediateTensors({\n\u001b[1;32m    339\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhidden_states\u001b[39m\u001b[38;5;124m\"\u001b[39m: hidden_states,\n\u001b[1;32m    340\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresidual\u001b[39m\u001b[38;5;124m\"\u001b[39m: residual\n\u001b[1;32m    341\u001b[0m     })\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1565\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:261\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, positions, hidden_states, kv_cache, attn_metadata, residual)\u001b[0m\n\u001b[1;32m    258\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n\u001b[1;32m    259\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(\n\u001b[1;32m    260\u001b[0m     hidden_states, residual)\n\u001b[0;32m--> 261\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    262\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states, residual\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1565\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:87\u001b[0m, in \u001b[0;36mLlamaMLP.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m     86\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m---> 87\u001b[0m     gate_up, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgate_up_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     88\u001b[0m     x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mact_fn(gate_up)\n\u001b[1;32m     89\u001b[0m     x, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdown_proj(x)\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)  \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m   1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m   1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m   1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m   1560\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m   1561\u001b[0m         \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m   1565\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py:367\u001b[0m, in \u001b[0;36mColumnParallelLinear.forward\u001b[0;34m(self, input_)\u001b[0m\n\u001b[1;32m    365\u001b[0m \u001b[38;5;66;03m# Matrix multiply.\u001b[39;00m\n\u001b[1;32m    366\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 367\u001b[0m output_parallel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_method\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    368\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgather_output:\n\u001b[1;32m    369\u001b[0m     \u001b[38;5;66;03m# All-gather across the partitions.\u001b[39;00m\n\u001b[1;32m    370\u001b[0m     output \u001b[38;5;241m=\u001b[39m tensor_model_parallel_all_gather(output_parallel)\n",
      "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py:135\u001b[0m, in \u001b[0;36mUnquantizedLinearMethod.apply\u001b[0;34m(self, layer, x, bias)\u001b[0m\n\u001b[1;32m    130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    131\u001b[0m           layer: torch\u001b[38;5;241m.\u001b[39mnn\u001b[38;5;241m.\u001b[39mModule,\n\u001b[1;32m    132\u001b[0m           x: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[1;32m    133\u001b[0m           bias: Optional[torch\u001b[38;5;241m.\u001b[39mTensor] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m--> 135\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.43 GiB of which 16.69 MiB is free. Including non-PyTorch memory, this process has 47.38 GiB memory in use. Of the allocated memory 46.94 GiB is allocated by PyTorch, with 33.31 MiB allocated in private pools (e.g., CUDA Graphs), and 41.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
     ]
    }
   ],
   "source": [
    "from huggingface_hub import login\n",
    "import os \n",
    "import torch\n",
    "torch.cuda.empty_cache()\n",
    "\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "\n",
    "# Use your Hugging Face token here\n",
    "login(\"\")\n",
    "\n",
    "from vllm import LLM, SamplingParams\n",
    "\n",
    "\n",
    "TEXT_LLM_MODEL_MISTRAL='meta-llama/Meta-Llama-3.1-8B-Instruct'\n",
    "\n",
    "\n",
    "\n",
    "vllm_model = LLM(\n",
    "        model=TEXT_LLM_MODEL_MISTRAL,\n",
    "        tensor_parallel_size=1,         # Use both GPUs\n",
    "        gpu_memory_utilization=0.95,     # Lower memory utilization to prevent OOM\n",
    "        max_model_len=26000)\n",
    "        \n",
    "    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NER Data Creation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
      "Collecting matplotlib\n",
      "  Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
      "Collecting contourpy>=1.0.1 (from matplotlib)\n",
      "  Downloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.4 kB)\n",
      "Collecting cycler>=0.10 (from matplotlib)\n",
      "  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
      "Collecting fonttools>=4.22.0 (from matplotlib)\n",
      "  Downloading fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (162 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.6/162.6 kB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)\n",
      "  Downloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)\n",
      "Requirement already satisfied: numpy>=1.23 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (1.26.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (24.1)\n",
      "Requirement already satisfied: pillow>=8 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (10.4.0)\n",
      "Collecting pyparsing>=2.3.1 (from matplotlib)\n",
      "  Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)\n",
      "Requirement already satisfied: python-dateutil>=2.7 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (2.9.0)\n",
      "Requirement already satisfied: six>=1.5 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
      "Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.3/8.3 MB\u001b[0m \u001b[31m99.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.2/323.2 kB\u001b[0m \u001b[31m134.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
      "Downloading fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m100.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
      "\u001b[?25hDownloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m122.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading pyparsing-3.1.4-py3-none-any.whl (104 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.1/104.1 kB\u001b[0m \u001b[31m547.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: pyparsing, kiwisolver, fonttools, cycler, contourpy, matplotlib\n",
      "Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.53.1 kiwisolver-1.4.7 matplotlib-3.9.2 pyparsing-3.1.4\n"
     ]
    }
   ],
   "source": [
    "!pip install matplotlib\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
      "To disable this warning, you can either:\n",
      "\t- Avoid using `tokenizers` before the fork if possible\n",
      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
      "Collecting seqeval\n",
      "  Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m249.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from seqeval) (1.26.4)\n",
      "Requirement already satisfied: scikit-learn>=0.21.3 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from seqeval) (1.5.1)\n",
      "Requirement already satisfied: scipy>=1.6.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (1.14.1)\n",
      "Requirement already satisfied: joblib>=1.2.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n",
      "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n",
      "Building wheels for collected packages: seqeval\n",
      "  Building wheel for seqeval (setup.py) ... \u001b[?25ldone\n",
      "\u001b[?25h  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=3ad02021d3334b570a31b1504d9d0c4da3569f9ff09c9efb6da8b2b37276ac87\n",
      "  Stored in directory: /tmp/pip-ephem-wheel-cache-a6hygjhv/wheels/bc/92/f0/243288f899c2eacdfa8c5f9aede4c71a9bad0ee26a01dc5ead\n",
      "Successfully built seqeval\n",
      "Installing collected packages: seqeval\n",
      "Successfully installed seqeval-1.2.2\n"
     ]
    }
   ],
   "source": [
    "!pip install seqeval\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import aiohttp\n",
    "import asyncio\n",
    "import nest_asyncio\n",
    "import random\n",
    "import re\n",
    "pd.set_option('max_colwidth', 2400)\n",
    "\n",
    "# # Apply the nest_asyncio patch\n",
    "# nest_asyncio.apply()\n",
    "\n",
    "# # Set environment variables\n",
    "# OPEN_AI_API_KEY = \"EMPTY\"  # Replace with your actual API key\n",
    "# OPENAI_API_BASE_MIXTRAL = os.getenv(\"OPENAI_API_BASE_MIXTRAL\", \"http://0.0.0.0:8004/v1\")\n",
    "# TEXT_LLM_MODEL_MIXTRAL = 'cognitivecomputations/dolphin-2.8-mistral-7b-v02'\n",
    "\n",
    "# List of metrics to be included in questions\n",
    "metrics = [\n",
    "    \"Return on Capital Employed\",\n",
    "    \"Debt ratio\",\n",
    "    \"Debt-Equity ratio\",\n",
    "    \"Interest Coverage\",\n",
    "    \"Dividend Payout ratio\",\n",
    "    \"PE Ratio\",\n",
    "    \"PE to Growth ratio\",\n",
    "    \"Dividend yield\",\n",
    "    \"Enterprise Value Multiple\",\n",
    "    \"Dividend yield TTM\",\n",
    "    \"Dividend yield percentage TTM\",\n",
    "    \"PE ratio TTM\",\n",
    "    \"PEF ratio TTM\",\n",
    "    \"Current ratio TTM\",\n",
    "    \"Quick ratio TTM\",\n",
    "    \"Gross Profit Margin TTM\",\n",
    "    \"Operating Profit Margin TTM\",\n",
    "    \"Net-Profit Margin TTM\",\n",
    "    \"Return on Assets TTM\",\n",
    "    \"Return on Equity TTM\",\n",
    "    \"Return on Capital Employed TTM\",\n",
    "    \"Debt ratio TTM\",\n",
    "    \"Debt-Equity ratio TTM\",\n",
    "    \"Interest Coverage TTM\",\n",
    "    \"PE ratio TTM\",\n",
    "    \"PEG ratio TTM\",\n",
    "    \"Price to Sales ratio TTM\",\n",
    "    \"Price to Fair-Value TTM\",\n",
    "    \"Dividend per share TTM\",\n",
    "    \"Revenue growth\",\n",
    "    \"EBITDA growth\",\n",
    "    \"Net-Income growth\",\n",
    "    \"Growth in Net-Income-ratio\",\n",
    "    \"EPS growth\",\n",
    "    \"Total-Debt growth\",\n",
    "    \"NetDebt growth\",\n",
    "    \"Altman Z Score\",\n",
    "    \"Piotroski Score\",\n",
    "    \"Working Capital\",\n",
    "    \"Total Assets\",\n",
    "    \"Retained Earnings\",\n",
    "    \"EBIT\",\n",
    "    \"Total Liabilities\"\n",
    "]\n",
    "\n",
    "# List of entities (companies) to be included in questions\n",
    "entities = [\n",
    "    \"Amazon\", \"Apple\", \"Microsoft\", \"Google\", \"Facebook\", \"Nvidia\",\n",
    "    \"Meta\", \"Tesla\", \"Broadcom\", \"ASML\", \"Costco\", \"Cisco\", \"Intel\", \"Adobe\"\n",
    "]\n",
    "\n",
    "# New list of example questions\n",
    "example_questions = [\n",
    "    \"What’s is the P/E of CVS compared to competitors\",\n",
    "    \"Is abercrombie still a good buy\",\n",
    "    \"Analyse CVS for me\",\n",
    "    \"How does Arch Resources' return on equity compare to its competitors in the coal mining sector?\",\n",
    "    \"What is the intrinsic value of Brookfield corporation\",\n",
    "    \"What is the P/E ratio of Brookfield Corporation?\",\n",
    "    \"Which companies, similar to Amazon, have a significant presence in both e-commerce and cloud computing services?\",\n",
    "    \"Which tech-driven businesses, akin to Amazon, provide extensive e-commerce solutions and operate successful cloud computing divisions?\",\n",
    "    \"Is Dynagas good stock for long run\",\n",
    "    \"Is Taiwan semiconductor good stock in the long run\",\n",
    "    \"Will Electric Vehicles outperform the market this year?\",\n",
    "    \"Analyse latest earning call of Meta\",\n",
    "    \"How does QSR.TO's market capitalization compare to its peers in the restaurant sector?\",\n",
    "    \"How have Amazon's focus areas changed?\",\n",
    "    \"What are the key risks investing in nvidia\",\n",
    "    \"What is the ROI of Starbucks?\",\n",
    "    \"What do you think of ticker LNG?\",\n",
    "    \"Analyze CDW\",\n",
    "    \"Is it a right time to buy Nvidia stock?\",\n",
    "    \"Levi stock\",\n",
    "    \"Tell me about TSLA but talk like a pirate\",\n",
    "    \"What about BYD?\",\n",
    "    \"What is Starbucks' return on equity (ROE) compared to its competitors in the food and beverage sector?\",\n",
    "    \"What is McDonald's ROE compared to Starbucks in the food and beverage sector?\",\n",
    "    \"How does Starbucks' ROE compare to Dunkin' Donuts in the coffee and bakery sector?\",\n",
    "    \"Why the company DoubleVerify Holdings shares fell?\",\n",
    "    \"What are the latest news for AMD?\",\n",
    "    \"What is the latest close price of NVDA?\",\n",
    "    \"Why is Tesla so overrated?\",\n",
    "    \"How do Costco and Walmart compare in terms of their respective market shares in the retail industry?\",\n",
    "    \"What are the consensus EPS forecasts for PYPL for the next 3 financial years?\",\n",
    "    \"What is PYPL EPS in FY24, FY25 and FY26?\",\n",
    "    \"Give me a chart of coke's P/E ratio for the last 5 years\",\n",
    "    \"What is the stock price of Apple?\",\n",
    "    \"Compare AMD and intel fundamentals for the last 5 years\",\n",
    "    \"Compare the stocks of AMD and intel based on their fundamentals for the last 5 years\",\n",
    "    \"What is the average return on equity (ROE) for AMD and Intel over the last 5 years?\",\n",
    "    \"Do a sentiment comparison between intel and amd for the last 2 years\",\n",
    "    \"How the options are looking for both amd and intel for the last 10 trading days\",\n",
    "    \"How has the price-to-earnings ratio (P/E) for AMD and Intel compared over the last 10 trading days?\",\n",
    "    \"Why intel price dropped during 2021 and 2022?\",\n",
    "    \"How is ASTS?\",\n",
    "    \"Is MTCH revenue growing?\",\n",
    "    \"Tell me about amazon\",\n",
    "    \"Will GME go up or go down today?\",\n",
    "    \"What are the key factors influencing GME's stock price movement?\",\n",
    "    \"Berkshire b vs voo\",\n",
    "    \"Compare Berkshire b with voo for past 20 years performance and report me\",\n",
    "    \"What are your thoughts on moneylion?\",\n",
    "    \"Akon\",\n",
    "    \"Predictions on NVIDIA for tomorrow\",\n",
    "    \"Value of oracle\",\n",
    "    \"Tell me about OKLA\",\n",
    "    \"Is ASTS a good investment?\",\n",
    "    \"Is smci good? why or why not\",\n",
    "    \"Should I buy accenture?\",\n",
    "    \"What is Accenture's return on equity (ROE) compared to its competitors in the IT services industry?\",\n",
    "    \"Should I invest in Intel intc?\",\n",
    "    \"What do you think about the ticker ENPH?\",\n",
    "    \"What is status of apple stocks?\",\n",
    "    \"Should I buy more apple?\",\n",
    "    \"What is the average growth rate of HD revenue?\",\n",
    "    \"What is the average growth rate of Home Depot revenue?\",\n",
    "    \"I have been tracking the share price of MSI (Motorola Solutions). What are the predictions for the next 6 months?\",\n",
    "    \"How's uber doing lately?\",\n",
    "    \"Show me the revenue, income, price graphs for Uber\",\n",
    "    \"Is ANGELONE a good stock to invest in?\",\n",
    "    \"Nvidia metrics and comparables\",\n",
    "    \"How does Nvidia's profitability compare to other semiconductor companies?\",\n",
    "    \"How does Nvidia's market capitalization compare to other semiconductor companies?\",\n",
    "    \"What do you think about hapag loyd?\",\n",
    "    \"Amazon latest results\",\n",
    "    \"Can you give me important informations for cloudlfare?\",\n",
    "    \"What is MSFT last 3 year revenue?\",\n",
    "    \"What is the revenue growth rate of MSFT in the last 3 years?\",\n",
    "    \"What is asts?\",\n",
    "    \"Future of polestar\",\n",
    "    \"What are some key metrics of nvidia over the past few years?\",\n",
    "    \"What is Nvidia's ROE and ROA over the past few years?\",\n",
    "    \"Is ford a good investment right now?\",\n",
    "    \"What are the prospects of growth for paypal?\",\n",
    "    \"Would Costco be a good investment right now?\",\n",
    "    \"Costco vs wallmart\",\n",
    "    \"How do Costco and Walmart compare in terms of their respective market shares in the retail industry?\",\n",
    "    \"How would you value costco?\",\n",
    "    \"What is Costco's price-to-earnings ratio compared to other discount retailers?\",\n",
    "    \"How is Ocugen performing?\",\n",
    "    \"I meant OCGN\",\n",
    "    \"What was Apple’s revenue last year?\",\n",
    "    \"What is the outlook for ARM stock?\",\n",
    "    \"What is the outlook for ARM stock?\",\n",
    "    \"Outlook for giig stock in 2025\",\n",
    "    \"Is SLS a good buy right now?\",\n",
    "    \"Is AMD a good buy right now?\",\n",
    "    \"What is happening with Tesla stock?\",\n",
    "    \"What is the short interest on WIRE?\",\n",
    "    \"What is the market cap of WIRE?\",\n",
    "    \"Is AMD good to be invested now or should I vest my RSU now?\",\n",
    "    \"What are the growth prospects for Intel in the semiconductor industry?\",\n",
    "    \"NVDA\",\n",
    "    \"Analyze the stock shw\",\n",
    "    \"Zts\",\n",
    "    \"Analyze zts\",\n",
    "    \"Tell me about the outlook of The Qt Company, QTCOM\",\n",
    "    \"What is the average revenue growth rate of The Qt Company in the past five years compared to its competitors in the software development industry?\",\n",
    "    \"META vs MSFT vs NVDA?\",\n",
    "    \"NVDA\",\n",
    "    \"How does NVDA's market capitalization compare to its peers in the semiconductor industry?\",\n",
    "    \"What is JPM’s net interest margin over the last 10 years?\",\n",
    "    \"How was Q1 of Smart Sand compared to Q4 2023?\",\n",
    "    \"Price target for NVDA?\",\n",
    "    \"What is the price target for NVIDIA's competitors like AMD and Intel?\",\n",
    "    \"Is lulu a good buy?\",\n",
    "    \"Is nvidia worth buying?\",\n",
    "    \"What is the market cap of Nvidia compared to its competitors?\",\n",
    "    \"What stock trend is predicted for nvda?\",\n",
    "    \"Should I buy calls on NVDA for December 2024?\",\n",
    "    \"What do you think about SQQQ?\",\n",
    "    \"What are the main competitors to crocs inc.?\",\n",
    "    \"What is the long term debt of walgreen boots alliance (WBA)?\",\n",
    "    \"What is the long term debt of CVS Health (CVS)?\",\n",
    "    \"What is future of DAX40?\",\n",
    "    \"What's the intrinsic value of nvidia?\",\n",
    "    \"What is the market capitalization of Nvidia's competitors?\",\n",
    "    \"What is the intrinsic value of Amazon?\",\n",
    "    \"What is the intrinsic value of Walmart?\",\n",
    "    \"What is Linde dcf value?\",\n",
    "    \"What can you tell me about linde?\",\n",
    "    \"What can you tell me about evvty (evolution ab)?\",\n",
    "    \"Give me the evvty important numbers\",\n",
    "    \"EVVTY Analysis\",\n",
    "    \"How does EVVTY's ROE compare to its peers in the industry?\",\n",
    "    \"What is quarterly sales and earnings of fiserv for last 2 years?\",\n",
    "    \"What is the average quarterly sales and earnings growth rate of Fiserv and its competitors in the financial technology industry over the last 2 years?\",\n",
    "    \"Calculate the price action movement for the last 30 days for google\",\n",
    "    \"What are the top competitors of Albemarle in the chemical industry?\",\n",
    "    \"What is nvidia?\",\n",
    "    \"What is Nvidia's primary business?\",\n",
    "    \"Current risk in Ulta stock?\",\n",
    "    \"What is the current risk in Ulta's competitors' stocks?\",\n",
    "    \"What is the forecast of Nano one materials?\"\n",
    "]\n",
    "\n",
    "\n",
    "df_list=[]\n",
    "for metric in metrics:\n",
    "    for entity in entities:\n",
    "        for example_question in example_questions:\n",
    "            for _ in range(3):  # Repeat 10 times\n",
    "                prompt = f\"\"\" You are an expert stock market analyst tasked with creating questions for training a model. Here are some example questions for reference:\\n\\n{example_questions}\\n\\nNow, generate 1 unique, diverse, and insightful question specifically for the metric '{metric}' and the entity '{entity}'. Ensure the question provides significant analytical depth and covers the key aspects outlined. The question should be less than 20 words and must contain both the metric '{metric}' and the entity '{entity}' exactly as specified. Use the metric '{metric}' and entity '{entity}' exactly as provided in your response, this is very very important; specifically check for this condition every single time before generating a question - like if metric is Operating Cash Flow - it should be used as it is. Start generating the questions directly without any preamble.Provide question in xml tags i.e <question>...</question>.\"\"\"\n",
    "                df_list.append({'Example Question': example_question,'Metric' :metric,\"Entity\": entity,'Prompt': prompt})\n",
    "\n",
    "\n",
    "# Create a DataFrame from the list\n",
    "full_df = pd.DataFrame(df_list).sample(n=25000)\n",
    "full_df\n",
    "\n",
    "prompts=[]\n",
    "all_questions=full_df['Prompt'].values.tolist()\n",
    "\n",
    "for question in  all_questions:\n",
    "    prompts.append(question)\n",
    "\n",
    "sampling_params = SamplingParams(\n",
    "        temperature=0.7,\n",
    "        max_tokens=100\n",
    "    )\n",
    "\n",
    "outputs = vllm_model.generate(prompts, sampling_params)\n",
    "\n",
    "llm_output=[]\n",
    "for output in outputs:\n",
    "    try:\n",
    "        generated_text = output.outputs[0].text\n",
    "        match = re.search(r'<question>(.*?)</question>', generated_text, re.DOTALL)\n",
    "        question_text = match.group(1).strip()  # Strips any leading/trailing whitespace\n",
    "\n",
    "\n",
    "        llm_output.append(question_text)\n",
    "    except:\n",
    "        llm_output.append('')\n",
    "\n",
    "\n",
    "full_df['Response'] =llm_output\n",
    "display(full_df)\n",
    "full_df=full_df[full_df['Response']!='']\n",
    "\n",
    "# Define the directory path and file name\n",
    "directory = '/home/ady/Stockbuzz.ai_Data/RouterNER/'\n",
    "file_path = os.path.join(directory, 'Ady_User_Database_NER_Entities_Metrics_2.csv')\n",
    "\n",
    "# Check if the directory exists, and create it if not\n",
    "if not os.path.exists(directory):\n",
    "    os.makedirs(directory)\n",
    "\n",
    "# Now save the DataFrame to the CSV file\n",
    "full_df.to_csv(file_path, index=False)\n",
    "\n",
    "print(f\"Synthetic questions dataset created and saved to {file_path}.\")\n",
    "# full_df.to_csv('/home/ady/Stocks_Guidance/StockBuzz_v2_June_24/StockBuzz_v2_June_24/AgentsTest/NER/Ady_User_Database_NER_Entities_Metrics_2.csv', index=False)# print(\"Synthetic questions dataset created and saved to 'Ady_Database4.csv'.\")\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NER Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Size of train dataset: 19891\n",
      "Size of validation dataset: 4975\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training Label Counts:\n",
      "Counter({'O': 322182, 'I-METRIC': 31143, 'B-ENTITY': 19886, 'B-METRIC': 16255})\n",
      "\n",
      "Validation Label Counts:\n",
      "Counter({'O': 80848, 'I-METRIC': 7635, 'B-ENTITY': 4970, 'B-METRIC': 4037})\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Map: 100%|██████████| 19891/19891 [00:01<00:00, 13101.13 examples/s]\n",
      "Map: 100%|██████████| 4975/4975 [00:00<00:00, 7058.05 examples/s] \n",
      "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
      "  warnings.warn(\n",
      "torch.distributed process group is initialized, but parallel_mode != ParallelMode.DISTRIBUTED. In order to use Torch DDP, launch your script with `python -m torch.distributed.launch\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <div>\n",
       "      \n",
       "      <progress value='350' max='1550' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
       "      [ 350/1550 09:57 < 34:19, 0.58 it/s, Epoch 2/10]\n",
       "    </div>\n",
       "    <table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       " <tr style=\"text-align: left;\">\n",
       "      <th>Step</th>\n",
       "      <th>Training Loss</th>\n",
       "      <th>Validation Loss</th>\n",
       "      <th>Overall Precision</th>\n",
       "      <th>Overall Recall</th>\n",
       "      <th>Overall F1</th>\n",
       "      <th>Overall Accuracy</th>\n",
       "      <th>Entity Precision</th>\n",
       "      <th>Entity Recall</th>\n",
       "      <th>Entity F1</th>\n",
       "      <th>Entity Accuracy</th>\n",
       "      <th>Metric Precision</th>\n",
       "      <th>Metric Recall</th>\n",
       "      <th>Metric F1</th>\n",
       "      <th>Metric Accuracy</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>10</td>\n",
       "      <td>1.767700</td>\n",
       "      <td>1.836932</td>\n",
       "      <td>0.294579</td>\n",
       "      <td>0.124076</td>\n",
       "      <td>0.174607</td>\n",
       "      <td>0.773001</td>\n",
       "      <td>0.348045</td>\n",
       "      <td>0.201776</td>\n",
       "      <td>0.255454</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.007485</td>\n",
       "      <td>0.000931</td>\n",
       "      <td>0.001657</td>\n",
       "      <td>0.005126</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>20</td>\n",
       "      <td>1.659600</td>\n",
       "      <td>1.680501</td>\n",
       "      <td>0.293320</td>\n",
       "      <td>0.124219</td>\n",
       "      <td>0.174527</td>\n",
       "      <td>0.773039</td>\n",
       "      <td>0.348308</td>\n",
       "      <td>0.202010</td>\n",
       "      <td>0.255712</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.007236</td>\n",
       "      <td>0.000931</td>\n",
       "      <td>0.001650</td>\n",
       "      <td>0.006003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>30</td>\n",
       "      <td>1.514500</td>\n",
       "      <td>1.429845</td>\n",
       "      <td>0.284166</td>\n",
       "      <td>0.124219</td>\n",
       "      <td>0.172870</td>\n",
       "      <td>0.773452</td>\n",
       "      <td>0.348168</td>\n",
       "      <td>0.202010</td>\n",
       "      <td>0.255675</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.006010</td>\n",
       "      <td>0.000931</td>\n",
       "      <td>0.001613</td>\n",
       "      <td>0.013421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>40</td>\n",
       "      <td>1.287100</td>\n",
       "      <td>1.152468</td>\n",
       "      <td>0.219771</td>\n",
       "      <td>0.126589</td>\n",
       "      <td>0.160645</td>\n",
       "      <td>0.799228</td>\n",
       "      <td>0.347879</td>\n",
       "      <td>0.202126</td>\n",
       "      <td>0.255690</td>\n",
       "      <td>0.000234</td>\n",
       "      <td>0.015226</td>\n",
       "      <td>0.006893</td>\n",
       "      <td>0.009490</td>\n",
       "      <td>0.217846</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>50</td>\n",
       "      <td>1.047800</td>\n",
       "      <td>0.888566</td>\n",
       "      <td>0.251184</td>\n",
       "      <td>0.152366</td>\n",
       "      <td>0.189676</td>\n",
       "      <td>0.810015</td>\n",
       "      <td>0.357186</td>\n",
       "      <td>0.209370</td>\n",
       "      <td>0.263995</td>\n",
       "      <td>0.014488</td>\n",
       "      <td>0.123117</td>\n",
       "      <td>0.062407</td>\n",
       "      <td>0.082829</td>\n",
       "      <td>0.299521</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>60</td>\n",
       "      <td>0.781200</td>\n",
       "      <td>0.600187</td>\n",
       "      <td>0.709895</td>\n",
       "      <td>0.603719</td>\n",
       "      <td>0.652516</td>\n",
       "      <td>0.884783</td>\n",
       "      <td>0.959285</td>\n",
       "      <td>0.922187</td>\n",
       "      <td>0.940371</td>\n",
       "      <td>0.838766</td>\n",
       "      <td>0.179177</td>\n",
       "      <td>0.096498</td>\n",
       "      <td>0.125439</td>\n",
       "      <td>0.358333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>70</td>\n",
       "      <td>0.541900</td>\n",
       "      <td>0.364509</td>\n",
       "      <td>0.764858</td>\n",
       "      <td>0.680118</td>\n",
       "      <td>0.720003</td>\n",
       "      <td>0.906769</td>\n",
       "      <td>0.993325</td>\n",
       "      <td>0.991004</td>\n",
       "      <td>0.992163</td>\n",
       "      <td>0.988784</td>\n",
       "      <td>0.319677</td>\n",
       "      <td>0.184613</td>\n",
       "      <td>0.234058</td>\n",
       "      <td>0.431712</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>80</td>\n",
       "      <td>0.352200</td>\n",
       "      <td>0.244981</td>\n",
       "      <td>0.827778</td>\n",
       "      <td>0.832771</td>\n",
       "      <td>0.830267</td>\n",
       "      <td>0.943034</td>\n",
       "      <td>0.995204</td>\n",
       "      <td>0.994041</td>\n",
       "      <td>0.994622</td>\n",
       "      <td>0.993691</td>\n",
       "      <td>0.689579</td>\n",
       "      <td>0.583085</td>\n",
       "      <td>0.631876</td>\n",
       "      <td>0.707830</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>90</td>\n",
       "      <td>0.246500</td>\n",
       "      <td>0.153416</td>\n",
       "      <td>0.854047</td>\n",
       "      <td>0.877289</td>\n",
       "      <td>0.865512</td>\n",
       "      <td>0.962899</td>\n",
       "      <td>0.998714</td>\n",
       "      <td>0.998131</td>\n",
       "      <td>0.998422</td>\n",
       "      <td>0.998014</td>\n",
       "      <td>0.751921</td>\n",
       "      <td>0.692809</td>\n",
       "      <td>0.721156</td>\n",
       "      <td>0.829770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>100</td>\n",
       "      <td>0.168000</td>\n",
       "      <td>0.103761</td>\n",
       "      <td>0.901217</td>\n",
       "      <td>0.936095</td>\n",
       "      <td>0.918325</td>\n",
       "      <td>0.976152</td>\n",
       "      <td>0.999182</td>\n",
       "      <td>0.999065</td>\n",
       "      <td>0.999124</td>\n",
       "      <td>0.998948</td>\n",
       "      <td>0.891855</td>\n",
       "      <td>0.846498</td>\n",
       "      <td>0.868585</td>\n",
       "      <td>0.920213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>110</td>\n",
       "      <td>0.115500</td>\n",
       "      <td>0.064440</td>\n",
       "      <td>0.919706</td>\n",
       "      <td>0.961442</td>\n",
       "      <td>0.940111</td>\n",
       "      <td>0.985192</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.935521</td>\n",
       "      <td>0.913562</td>\n",
       "      <td>0.924411</td>\n",
       "      <td>0.960478</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>120</td>\n",
       "      <td>0.073600</td>\n",
       "      <td>0.036066</td>\n",
       "      <td>0.952067</td>\n",
       "      <td>0.975515</td>\n",
       "      <td>0.963649</td>\n",
       "      <td>0.991613</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.955869</td>\n",
       "      <td>0.948212</td>\n",
       "      <td>0.952025</td>\n",
       "      <td>0.976799</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>130</td>\n",
       "      <td>0.051400</td>\n",
       "      <td>0.026381</td>\n",
       "      <td>0.965401</td>\n",
       "      <td>0.985711</td>\n",
       "      <td>0.975450</td>\n",
       "      <td>0.993743</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.980513</td>\n",
       "      <td>0.974851</td>\n",
       "      <td>0.977674</td>\n",
       "      <td>0.987860</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>140</td>\n",
       "      <td>0.040200</td>\n",
       "      <td>0.022301</td>\n",
       "      <td>0.972017</td>\n",
       "      <td>0.992676</td>\n",
       "      <td>0.982238</td>\n",
       "      <td>0.995115</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.992896</td>\n",
       "      <td>0.989382</td>\n",
       "      <td>0.991136</td>\n",
       "      <td>0.994537</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>150</td>\n",
       "      <td>0.031300</td>\n",
       "      <td>0.016631</td>\n",
       "      <td>0.977112</td>\n",
       "      <td>0.993179</td>\n",
       "      <td>0.985080</td>\n",
       "      <td>0.996085</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.992331</td>\n",
       "      <td>0.988264</td>\n",
       "      <td>0.990293</td>\n",
       "      <td>0.993997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>160</td>\n",
       "      <td>0.025900</td>\n",
       "      <td>0.015102</td>\n",
       "      <td>0.979862</td>\n",
       "      <td>0.995692</td>\n",
       "      <td>0.987713</td>\n",
       "      <td>0.996622</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.997385</td>\n",
       "      <td>0.994784</td>\n",
       "      <td>0.996083</td>\n",
       "      <td>0.997167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>170</td>\n",
       "      <td>0.021500</td>\n",
       "      <td>0.010109</td>\n",
       "      <td>0.990361</td>\n",
       "      <td>0.995979</td>\n",
       "      <td>0.993162</td>\n",
       "      <td>0.998167</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.996267</td>\n",
       "      <td>0.994225</td>\n",
       "      <td>0.995245</td>\n",
       "      <td>0.996965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>180</td>\n",
       "      <td>0.015400</td>\n",
       "      <td>0.006890</td>\n",
       "      <td>0.995055</td>\n",
       "      <td>0.996912</td>\n",
       "      <td>0.995983</td>\n",
       "      <td>0.998935</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.996453</td>\n",
       "      <td>0.994411</td>\n",
       "      <td>0.995431</td>\n",
       "      <td>0.996965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>190</td>\n",
       "      <td>0.013000</td>\n",
       "      <td>0.007091</td>\n",
       "      <td>0.994557</td>\n",
       "      <td>0.997200</td>\n",
       "      <td>0.995877</td>\n",
       "      <td>0.998935</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999440</td>\n",
       "      <td>0.997392</td>\n",
       "      <td>0.998415</td>\n",
       "      <td>0.998516</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>200</td>\n",
       "      <td>0.009600</td>\n",
       "      <td>0.005577</td>\n",
       "      <td>0.996559</td>\n",
       "      <td>0.998133</td>\n",
       "      <td>0.997345</td>\n",
       "      <td>0.999251</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999067</td>\n",
       "      <td>0.997206</td>\n",
       "      <td>0.998135</td>\n",
       "      <td>0.998584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>210</td>\n",
       "      <td>0.009400</td>\n",
       "      <td>0.004625</td>\n",
       "      <td>0.997203</td>\n",
       "      <td>0.998420</td>\n",
       "      <td>0.997811</td>\n",
       "      <td>0.999357</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.998134</td>\n",
       "      <td>0.996647</td>\n",
       "      <td>0.997390</td>\n",
       "      <td>0.998314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>220</td>\n",
       "      <td>0.009500</td>\n",
       "      <td>0.005023</td>\n",
       "      <td>0.997061</td>\n",
       "      <td>0.998779</td>\n",
       "      <td>0.997920</td>\n",
       "      <td>0.999319</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999254</td>\n",
       "      <td>0.997765</td>\n",
       "      <td>0.998509</td>\n",
       "      <td>0.998921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>230</td>\n",
       "      <td>0.006500</td>\n",
       "      <td>0.003757</td>\n",
       "      <td>0.997704</td>\n",
       "      <td>0.998420</td>\n",
       "      <td>0.998062</td>\n",
       "      <td>0.999463</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.998134</td>\n",
       "      <td>0.996647</td>\n",
       "      <td>0.997390</td>\n",
       "      <td>0.998381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>240</td>\n",
       "      <td>0.005100</td>\n",
       "      <td>0.003936</td>\n",
       "      <td>0.997777</td>\n",
       "      <td>0.999138</td>\n",
       "      <td>0.998457</td>\n",
       "      <td>0.999453</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999813</td>\n",
       "      <td>0.998510</td>\n",
       "      <td>0.999161</td>\n",
       "      <td>0.999123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>250</td>\n",
       "      <td>0.005600</td>\n",
       "      <td>0.003105</td>\n",
       "      <td>0.998493</td>\n",
       "      <td>0.998779</td>\n",
       "      <td>0.998636</td>\n",
       "      <td>0.999568</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999440</td>\n",
       "      <td>0.997578</td>\n",
       "      <td>0.998508</td>\n",
       "      <td>0.998651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>260</td>\n",
       "      <td>0.005700</td>\n",
       "      <td>0.003194</td>\n",
       "      <td>0.998135</td>\n",
       "      <td>0.999138</td>\n",
       "      <td>0.998636</td>\n",
       "      <td>0.999511</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999813</td>\n",
       "      <td>0.998510</td>\n",
       "      <td>0.999161</td>\n",
       "      <td>0.999123</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>270</td>\n",
       "      <td>0.004500</td>\n",
       "      <td>0.002846</td>\n",
       "      <td>0.998493</td>\n",
       "      <td>0.999210</td>\n",
       "      <td>0.998852</td>\n",
       "      <td>0.999616</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999814</td>\n",
       "      <td>0.998696</td>\n",
       "      <td>0.999254</td>\n",
       "      <td>0.999258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>280</td>\n",
       "      <td>0.005000</td>\n",
       "      <td>0.002617</td>\n",
       "      <td>0.998565</td>\n",
       "      <td>0.999210</td>\n",
       "      <td>0.998887</td>\n",
       "      <td>0.999655</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999441</td>\n",
       "      <td>0.998510</td>\n",
       "      <td>0.998975</td>\n",
       "      <td>0.999258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>290</td>\n",
       "      <td>0.003400</td>\n",
       "      <td>0.002572</td>\n",
       "      <td>0.998278</td>\n",
       "      <td>0.999138</td>\n",
       "      <td>0.998708</td>\n",
       "      <td>0.999626</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999254</td>\n",
       "      <td>0.998510</td>\n",
       "      <td>0.998882</td>\n",
       "      <td>0.999326</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>300</td>\n",
       "      <td>0.005100</td>\n",
       "      <td>0.002382</td>\n",
       "      <td>0.998278</td>\n",
       "      <td>0.999210</td>\n",
       "      <td>0.998744</td>\n",
       "      <td>0.999664</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999068</td>\n",
       "      <td>0.998696</td>\n",
       "      <td>0.998882</td>\n",
       "      <td>0.999393</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>310</td>\n",
       "      <td>0.005600</td>\n",
       "      <td>0.002559</td>\n",
       "      <td>0.998135</td>\n",
       "      <td>0.999354</td>\n",
       "      <td>0.998744</td>\n",
       "      <td>0.999655</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999069</td>\n",
       "      <td>0.999069</td>\n",
       "      <td>0.999069</td>\n",
       "      <td>0.999528</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>320</td>\n",
       "      <td>0.005000</td>\n",
       "      <td>0.002000</td>\n",
       "      <td>0.998923</td>\n",
       "      <td>0.999138</td>\n",
       "      <td>0.999031</td>\n",
       "      <td>0.999674</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999441</td>\n",
       "      <td>0.998323</td>\n",
       "      <td>0.998882</td>\n",
       "      <td>0.998988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>330</td>\n",
       "      <td>0.003600</td>\n",
       "      <td>0.001996</td>\n",
       "      <td>0.999067</td>\n",
       "      <td>0.999138</td>\n",
       "      <td>0.999102</td>\n",
       "      <td>0.999664</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999942</td>\n",
       "      <td>0.999766</td>\n",
       "      <td>0.999627</td>\n",
       "      <td>0.998137</td>\n",
       "      <td>0.998881</td>\n",
       "      <td>0.998853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>340</td>\n",
       "      <td>0.004300</td>\n",
       "      <td>0.002259</td>\n",
       "      <td>0.998350</td>\n",
       "      <td>0.999067</td>\n",
       "      <td>0.998708</td>\n",
       "      <td>0.999635</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999068</td>\n",
       "      <td>0.998137</td>\n",
       "      <td>0.998602</td>\n",
       "      <td>0.999056</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>350</td>\n",
       "      <td>0.002900</td>\n",
       "      <td>0.002071</td>\n",
       "      <td>0.998923</td>\n",
       "      <td>0.999210</td>\n",
       "      <td>0.999067</td>\n",
       "      <td>0.999674</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.999883</td>\n",
       "      <td>0.999627</td>\n",
       "      <td>0.998323</td>\n",
       "      <td>0.998975</td>\n",
       "      <td>0.998988</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table><p>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
    "\n",
    "import re\n",
    "import numpy as np\n",
    "import torch\n",
    "import pandas as pd\n",
    "import time\n",
    "from datasets import Dataset, load_metric\n",
    "from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer, DataCollatorForTokenClassification, TrainerCallback, EarlyStoppingCallback\n",
    "from transformers.trainer_utils import IntervalStrategy\n",
    "import matplotlib.pyplot as plt\n",
    "from collections import Counter\n",
    "import torch.nn as nn\n",
    "\n",
    "pd.set_option('display.width', 1000)\n",
    "\n",
    "# Define the label mapping\n",
    "labels = [\"O\", \"B-ENTITY\", \"I-ENTITY\", \"B-METRIC\", \"I-METRIC\"]\n",
    "label2id = {label: i for i, label in enumerate(labels)}\n",
    "id2label = {i: label for i, label in enumerate(labels)}\n",
    "\n",
    "# Load the dataset\n",
    "df_input_consolidated = pd.read_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Ady_User_Database_NER_Entities_Metrics_2.csv')\n",
    "\n",
    "df_input_consolidated = df_input_consolidated[df_input_consolidated['Response'] != 'Error: ']\n",
    "df_input_consolidated['Response'] = df_input_consolidated['Response'].str.lower()\n",
    "df_input_consolidated['Metric'] = df_input_consolidated['Metric'].str.lower()\n",
    "df_input_consolidated['Entity'] = df_input_consolidated['Entity'].str.lower()\n",
    "\n",
    "train_set = int(0.8 * len(df_input_consolidated))\n",
    "df_train = df_input_consolidated[:train_set][['Response', 'Metric', 'Entity']]\n",
    "df_val = df_input_consolidated[train_set:][['Response', 'Metric', 'Entity']]\n",
    "\n",
    "# Function to tokenize sentences and assign NER tags\n",
    "def tag_entities_and_metrics(sentence, metric, entity):\n",
    "    words = re.findall(r\"\\w+|[.,!?;'-]|\\b's\\b\", sentence)\n",
    "    tags = ['O'] * len(words)\n",
    "\n",
    "    # Tagging the metric\n",
    "    metric_tokens = metric.split()\n",
    "    metric_len = len(metric_tokens)\n",
    "    for i in range(len(words) - metric_len + 1):\n",
    "        if words[i:i + metric_len] == metric_tokens:\n",
    "            tags[i] = 'B-METRIC'\n",
    "            for j in range(1, metric_len):\n",
    "                tags[i + j] = 'I-METRIC'\n",
    "\n",
    "    # Tagging the entity\n",
    "    entity_tokens = entity.split()\n",
    "    entity_len = len(entity_tokens)\n",
    "    for i in range(len(words) - entity_len + 1):\n",
    "        if words[i:i + entity_len] == entity_tokens:\n",
    "            tags[i] = 'B-ENTITY'\n",
    "            for j in range(1, entity_len):\n",
    "                tags[i + j] = 'I-ENTITY'\n",
    "\n",
    "    return words, tags\n",
    "\n",
    "# Function to process dataframe and prepare datasets\n",
    "def prepare_dataset(df):\n",
    "    df['tokens_and_tags'] = df.apply(lambda row: tag_entities_and_metrics(row['Response'], row['Metric'], row['Entity']), axis=1)\n",
    "    df['tokens'] = df['tokens_and_tags'].apply(lambda x: x[0])\n",
    "    df['ner_tags'] = df['tokens_and_tags'].apply(lambda x: x[1])\n",
    "    df = df.drop(columns=['tokens_and_tags'])\n",
    "    df = df[df['ner_tags'].apply(lambda tags: not all(tag == 'O' for tag in tags))]\n",
    "    df['ner_tags'] = df['ner_tags'].apply(lambda tags: [label2id[tag] for tag in tags])\n",
    "    return Dataset.from_pandas(df)\n",
    "\n",
    "# Prepare datasets\n",
    "train_dataset = prepare_dataset(df_train)\n",
    "val_dataset = prepare_dataset(df_val)\n",
    "\n",
    "# Print the size of the train and validation datasets\n",
    "print(f\"Size of train dataset: {len(train_dataset)}\")\n",
    "print(f\"Size of validation dataset: {len(val_dataset)}\")\n",
    "\n",
    "# Count the occurrences of each label in the training and validation sets\n",
    "def count_labels(dataset):\n",
    "    label_counts = Counter()\n",
    "    for example in dataset:\n",
    "        labels = example['ner_tags']\n",
    "        for label in labels:\n",
    "            label_counts[id2label[label]] += 1\n",
    "    return label_counts\n",
    "\n",
    "train_label_counts = count_labels(train_dataset)\n",
    "val_label_counts = count_labels(val_dataset)\n",
    "\n",
    "print(\"Training Label Counts:\")\n",
    "print(train_label_counts)\n",
    "\n",
    "print(\"\\nValidation Label Counts:\")\n",
    "print(val_label_counts)\n",
    "\n",
    "# Tokenization and Alignment Functions\n",
    "checkpoint = 'Jean-Baptiste/roberta-large-ner-english'\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
    "\n",
    "def align_labels(labels, word_ids):\n",
    "    aligned_labels = []\n",
    "    prev_word_id = None\n",
    "    for word_id in word_ids:\n",
    "        if word_id is None:\n",
    "            aligned_labels.append(-100)\n",
    "        elif word_id != prev_word_id:\n",
    "            aligned_labels.append(labels[word_id])\n",
    "        else:\n",
    "            aligned_labels.append(labels[word_id] if labels[word_id] != 0 else -100)\n",
    "        prev_word_id = word_id\n",
    "    return aligned_labels\n",
    "\n",
    "def tokenize_and_align_labels(examples):\n",
    "    tokenized_inputs = tokenizer(examples['tokens'], truncation=True, max_length=32, is_split_into_words=True)\n",
    "    all_labels = examples['ner_tags']\n",
    "    new_labels = [align_labels(labels, tokenized_inputs.word_ids(i)) for i, labels in enumerate(all_labels)]\n",
    "    tokenized_inputs['labels'] = new_labels\n",
    "    return tokenized_inputs\n",
    "\n",
    "tokenized_train_dataset = train_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=['Response', 'Metric', 'Entity'])\n",
    "tokenized_val_dataset = val_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=['Response', 'Metric', 'Entity'])\n",
    "\n",
    "# Define the data collator\n",
    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)\n",
    "\n",
    "# Load metric\n",
    "metric = load_metric('seqeval',trust_remote_code=True)\n",
    "\n",
    "# Custom loss function to give higher weight to metrics\n",
    "class WeightedLoss(nn.CrossEntropyLoss):\n",
    "    def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'):\n",
    "        super(WeightedLoss, self).__init__(weight, size_average, ignore_index, reduce, reduction)\n",
    "\n",
    "    def forward(self, input, target):\n",
    "        if self.weight is not None:\n",
    "            assert self.weight.dim() == 1\n",
    "            input = input * self.weight.unsqueeze(0).expand_as(input)\n",
    "        return super(WeightedLoss, self).forward(input, target)\n",
    "\n",
    "# Custom Trainer to use the weighted loss\n",
    "class CustomTrainer(Trainer):\n",
    "    def compute_loss(self, model, inputs, return_outputs=False):\n",
    "        labels = inputs.get(\"labels\")\n",
    "        outputs = model(**inputs)\n",
    "        logits = outputs.get(\"logits\")\n",
    "        # Define the weights, giving higher weight to METRIC labels\n",
    "        class_weights = torch.tensor([0.4, 0.5, 0.5, 1.0, 1.0], device=logits.device)\n",
    "        loss_fct = WeightedLoss(weight=class_weights)\n",
    "        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n",
    "        return (loss, outputs) if return_outputs else loss\n",
    "\n",
    "# Function to compute metrics\n",
    "def compute_metrics(p):\n",
    "    predictions, labels = p\n",
    "    predictions = np.argmax(predictions, axis=2)\n",
    "    true_labels = [[id2label[label] for label in label_set if label != -100] for label_set in labels]\n",
    "    true_predictions = [[id2label[pred] for pred, label in zip(pred_set, label_set) if label != -100] for pred_set, label_set in zip(predictions, labels)]\n",
    "    results = metric.compute(predictions=true_predictions, references=true_labels, zero_division=0)\n",
    "\n",
    "    # Calculate metrics for ENTITY\n",
    "    entity_results = metric.compute(\n",
    "        predictions=[[pred for pred, true in zip(pred_set, label_set) if true.startswith('B-ENTITY') or true.startswith('I-ENTITY')] for pred_set, label_set in zip(true_predictions, true_labels)],\n",
    "        references=[[true for true in label_set if true.startswith('B-ENTITY') or true.startswith('I-ENTITY')] for label_set in true_labels],\n",
    "        zero_division=0\n",
    "    )\n",
    "\n",
    "    # Calculate metrics for METRIC\n",
    "    metric_results = metric.compute(\n",
    "        predictions=[[pred for pred, true in zip(pred_set, label_set) if true.startswith('B-METRIC') or true.startswith('I-METRIC')] for pred_set, label_set in zip(true_predictions, true_labels)],\n",
    "        references=[[true for true in label_set if true.startswith('B-METRIC') or true.startswith('I-METRIC')] for label_set in true_labels],\n",
    "        zero_division=0\n",
    "    )\n",
    "\n",
    "    return {\n",
    "        \"overall_precision\": results[\"overall_precision\"],\n",
    "        \"overall_recall\": results[\"overall_recall\"],\n",
    "        \"overall_f1\": results[\"overall_f1\"],\n",
    "        \"overall_accuracy\": results[\"overall_accuracy\"],\n",
    "        \"entity_precision\": entity_results[\"overall_precision\"],\n",
    "        \"entity_recall\": entity_results[\"overall_recall\"],\n",
    "        \"entity_f1\": entity_results[\"overall_f1\"],\n",
    "        \"entity_accuracy\": entity_results[\"overall_accuracy\"],\n",
    "        \"metric_precision\": metric_results[\"overall_precision\"],\n",
    "        \"metric_recall\": metric_results[\"overall_recall\"],\n",
    "        \"metric_f1\": metric_results[\"overall_f1\"],\n",
    "        \"metric_accuracy\": metric_results[\"overall_accuracy\"],\n",
    "    }\n",
    "\n",
    "# Load the model\n",
    "model = AutoModelForTokenClassification.from_pretrained(checkpoint, num_labels=len(labels), id2label=id2label, label2id=label2id,ignore_mismatched_sizes=True)\n",
    "\n",
    "# Custom callback to capture loss history\n",
    "class LossHistoryCallback(TrainerCallback):\n",
    "    def __init__(self):\n",
    "        self.losses = []\n",
    "        self.eval_losses = []\n",
    "\n",
    "    def on_log(self, args, state, control, logs=None, **kwargs):\n",
    "        if logs is not None:\n",
    "            if 'loss' in logs:\n",
    "                self.losses.append(logs['loss'])\n",
    "            if 'eval_loss' in logs:\n",
    "                self.eval_losses.append(logs['eval_loss'])\n",
    "\n",
    "loss_history_callback = LossHistoryCallback()\n",
    "\n",
    "# Training arguments\n",
    "training_args = TrainingArguments(\n",
    "    output_dir='./results',\n",
    "    evaluation_strategy=IntervalStrategy.STEPS,\n",
    "    save_strategy=\"steps\",\n",
    "    logging_strategy=\"steps\",\n",
    "    learning_rate=1e-5,\n",
    "    num_train_epochs=10,\n",
    "    per_device_train_batch_size=32,\n",
    "    per_device_eval_batch_size=32,    \n",
    "    gradient_accumulation_steps=4,  # Accumulate gradients over 4 steps\n",
    "    warmup_steps=500,\n",
    "    weight_decay=0.01,\n",
    "    logging_dir='./logs',\n",
    "    logging_steps=10,\n",
    "    eval_steps=10,\n",
    "    save_steps=10,\n",
    "    load_best_model_at_end=True,\n",
    "    save_total_limit=1,\n",
    ")\n",
    "\n",
    "# Custom Trainer instance\n",
    "trainer = CustomTrainer(\n",
    "    model=model,\n",
    "    args=training_args,\n",
    "    train_dataset=tokenized_train_dataset,\n",
    "    eval_dataset=tokenized_val_dataset,\n",
    "    tokenizer=tokenizer,\n",
    "    data_collator=data_collator,\n",
    "    compute_metrics=compute_metrics,\n",
    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=2), loss_history_callback]\n",
    ")\n",
    "\n",
    "# Train the model\n",
    "trainer.train()\n",
    "\n",
    "# Save the model\n",
    "trainer.save_model('/home/ady/Stockbuzz.ai_Models/RouterNER/ner_model')\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "NER Inference"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Entities and metrics and their average scores for text: 'What is the EPS for Apple from 2023, compare it against Intel?'\n",
      "Predicted Entities: {'apple': [0.9999647]}\n",
      "Predicted Metrics: {'eps': [0.99446386]}\n",
      "\n",
      "==================================================\n",
      "\n",
      "Entities and metrics and their average scores for text: 'What are the REVENUES of Amazon?'\n",
      "Predicted Entities: {'amazon': [0.9999809]}\n",
      "Predicted Metrics: {}\n",
      "\n",
      "==================================================\n",
      "\n",
      "Entities and metrics and their average scores for text: 'What is the annual income for Microsoft vs NVDA?'\n",
      "Predicted Entities: {'microsoft': [0.9999985]}\n",
      "Predicted Metrics: {}\n",
      "\n",
      "==================================================\n",
      "\n",
      "Entities and metrics and their average scores for text: 'What is the depreciation for ShaktiPumps in 2023?'\n",
      "Predicted Entities: {}\n",
      "Predicted Metrics: {}\n",
      "\n",
      "==================================================\n",
      "\n",
      "Entities and metrics and their average scores for text: 'What is the ttm net profits for Amazon in 2023?'\n",
      "Predicted Entities: {'amazon': [0.9999693]}\n",
      "Predicted Metrics: {'tm': [0.5082908]}\n",
      "\n",
      "==================================================\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import time\n",
    "import numpy as np\n",
    "from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n",
    "\n",
    "checkpoint = \"ner_model\"\n",
    "tokenizer_ner = AutoTokenizer.from_pretrained(checkpoint)\n",
    "model_ner = AutoModelForTokenClassification.from_pretrained(checkpoint)\n",
    "\n",
    "# Initialize the NER pipeline without additional parameters\n",
    "ner_model = pipeline('ner', model=model_ner, tokenizer=tokenizer_ner, aggregation_strategy='simple')\n",
    "\n",
    "# Enhanced post-processing\n",
    "def extract_entities_with_scores(example_text):\n",
    "    # Tokenize the text with offset mappings\n",
    "    encoding = tokenizer_ner(example_text, return_offsets_mapping=True, truncation=True, max_length=32)\n",
    "    offset_mapping = encoding['offset_mapping']\n",
    "    tokens = tokenizer_ner.convert_ids_to_tokens(encoding['input_ids'])\n",
    "    \n",
    "    # Use the pipeline directly\n",
    "    ner_results = ner_model(example_text)\n",
    "    # print(ner_results)\n",
    "\n",
    "    entities = {}\n",
    "    metrics = {}\n",
    "    current_entity = None\n",
    "\n",
    "    for result in ner_results:\n",
    "        entity_type = result['entity_group']\n",
    "        start, end = result['start'], result['end']\n",
    "        score = result['score']\n",
    "        word = example_text[start:end].strip()\n",
    "        # print(word)\n",
    "\n",
    "        # Check if current entity needs to be continued or started fresh\n",
    "        if current_entity and current_entity['entity_group'] == entity_type and (current_entity['end'] == start or current_entity['end'] + 1 == start):\n",
    "            # Continue the current entity\n",
    "            current_entity['word'] += ' ' + word if current_entity['end'] + 1 == start else word\n",
    "            current_entity['scores'].append(score)\n",
    "            current_entity['end'] = end\n",
    "        else:\n",
    "            # Save the previous entity\n",
    "            if current_entity:\n",
    "                entity_name = current_entity['word']\n",
    "                average_score = np.mean(current_entity['scores'])\n",
    "                if current_entity['entity_group'] == 'ENTITY':\n",
    "                    if entity_name not in entities:\n",
    "                        entities[entity_name] = []\n",
    "                    entities[entity_name].append(average_score)\n",
    "                elif current_entity['entity_group'] == 'METRIC':\n",
    "                    if entity_name not in metrics:\n",
    "                        metrics[entity_name] = []\n",
    "                    metrics[entity_name].append(average_score)\n",
    "\n",
    "            # Start a new entity\n",
    "            current_entity = {'entity_group': entity_type, 'word': word, 'scores': [score], 'start': start, 'end': end}\n",
    "\n",
    "    # Append the last entity or metric if exists\n",
    "    if current_entity:\n",
    "        entity_name = current_entity['word']\n",
    "        average_score = np.mean(current_entity['scores'])\n",
    "        if current_entity['entity_group'] == 'ENTITY':\n",
    "            if entity_name not in entities:\n",
    "                entities[entity_name] = []\n",
    "            entities[entity_name].append(average_score)\n",
    "        elif current_entity['entity_group'] == 'METRIC':\n",
    "            if entity_name not in metrics:\n",
    "                metrics[entity_name] = []\n",
    "            metrics[entity_name].append(average_score)\n",
    "\n",
    "    return entities, metrics\n",
    "\n",
    "# Test the model with example texts\n",
    "example_texts = [\n",
    "    \"What is the EPS for Apple from 2023, compare it against Intel?\",\n",
    "    \"What are the REVENUES of Amazon?\",\n",
    "    \"What is the annual income for Microsoft vs NVDA?\",\n",
    "    \"What is the depreciation for ShaktiPumps in 2023?\",\n",
    "    \"What is the ttm net profits for Amazon in 2023?\"\n",
    "]\n",
    "\n",
    "## Test set \n",
    "# example_texts = df_val.sample(n=5)['Response'].values.tolist()\n",
    "\n",
    "\n",
    "for text in example_texts:\n",
    "    entities, metrics = extract_entities_with_scores(text.lower())\n",
    "\n",
    "    print(f\"Entities and metrics and their average scores for text: '{text}'\")\n",
    "    print(f\"Predicted Entities: {entities}\")\n",
    "    print(f\"Predicted Metrics: {metrics}\")\n",
    "    print(\"\\n\" + \"=\"*50 + \"\\n\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Metric\n",
       "pe ratio ttm                      1156\n",
       "return on assets ttm               635\n",
       "dividend yield                     615\n",
       "piotroski score                    612\n",
       "return on capital employed         611\n",
       "price to fair-value ttm            610\n",
       "net-profit margin ttm              605\n",
       "debt-equity ratio ttm              603\n",
       "debt ratio ttm                     598\n",
       "operating profit margin ttm        597\n",
       "total-debt growth                  595\n",
       "retained earnings                  594\n",
       "altman z score                     591\n",
       "pe ratio                           589\n",
       "ebit                               589\n",
       "dividend yield ttm                 589\n",
       "return on capital employed ttm     588\n",
       "netdebt growth                     584\n",
       "growth in net-income-ratio         582\n",
       "dividend yield percentage ttm      581\n",
       "gross profit margin ttm            578\n",
       "interest coverage ttm              578\n",
       "peg ratio ttm                      577\n",
       "pe to growth ratio                 576\n",
       "dividend payout ratio              575\n",
       "ebitda growth                      574\n",
       "pef ratio ttm                      572\n",
       "working capital                    571\n",
       "debt-equity ratio                  571\n",
       "revenue growth                     571\n",
       "eps growth                         571\n",
       "enterprise value multiple          566\n",
       "current ratio ttm                  562\n",
       "total liabilities                  562\n",
       "dividend per share ttm             560\n",
       "interest coverage                  558\n",
       "net-income growth                  557\n",
       "price to sales ratio ttm           555\n",
       "quick ratio ttm                    551\n",
       "return on equity ttm               550\n",
       "debt ratio                         546\n",
       "total assets                       539\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_input_consolidated['Metric'].value_counts()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Send to HF"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Router"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Send to Huggingface\n",
    "\n",
    "from huggingface_hub import login\n",
    "login(token=\"\")\n",
    "\n",
    "from huggingface_hub import upload_folder\n",
    "\n",
    "# Define the local directory and repo details\n",
    "local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/\"\n",
    "repo_id = \"Aditiyadav/Router\"  # Your Hugging Face repo\n",
    "\n",
    "# Upload the folder directly to Hugging Face Hub\n",
    "upload_folder(\n",
    "    repo_id=repo_id,\n",
    "    folder_path=local_model_path,  # The local directory containing the model\n",
    "    commit_message=\"Uploading model from local directory\"\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Send to Huggingface\n",
    "\n",
    "from huggingface_hub import login\n",
    "login(token=\"\")\n",
    "\n",
    "from huggingface_hub import upload_folder\n",
    "\n",
    "# Define the local directory and repo details\n",
    "local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/\"\n",
    "repo_id = \"Aditiyadav/SubRouter\"  # Your Hugging Face repo\n",
    "\n",
    "# Upload the folder directly to Hugging Face Hub\n",
    "upload_folder(\n",
    "    repo_id=repo_id,\n",
    "    folder_path=local_model_path,  # The local directory containing the model\n",
    "    commit_message=\"Uploading model from local directory\"\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Send to Huggingface\n",
    "\n",
    "from huggingface_hub import login\n",
    "login(token=\"\")\n",
    "\n",
    "from huggingface_hub import upload_folder\n",
    "\n",
    "# Define the local directory and repo details\n",
    "local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/ner_model/\"\n",
    "repo_id = \"Aditiyadav/NER\"  # Your Hugging Face repo\n",
    "\n",
    "# Upload the folder directly to Hugging Face Hub\n",
    "upload_folder(\n",
    "    repo_id=repo_id,\n",
    "    folder_path=local_model_path,  # The local directory containing the model\n",
    "    commit_message=\"Uploading model from local directory\"\n",
    ")\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gpu_jan12",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}