{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: fineGrained).\n", "Your token has been saved to /home/ady/.cache/huggingface/token\n", "Login successful\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-09-15 15:22:07,933\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO 09-15 15:22:08 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=26000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llama/Meta-Llama-3.1-8B-Instruct, use_v2_block_manager=False, num_scheduler_steps=1, enable_prefix_caching=False, use_async_output_proc=True)\n", "INFO 09-15 15:22:09 model_runner.py:997] Starting to load model meta-llama/Meta-Llama-3.1-8B-Instruct...\n", "INFO 09-15 15:22:09 weight_utils.py:242] Using model weights format ['*.safetensors']\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "# !pwd" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Router Data Creation " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
usernametextcategoryvalues
04b0d73e8-6ac2-4214-8c68-7d1fc36dd12dWhat’s is the P/E of CVS compared to competitorsCompanyAnalysis1.0
20f52d06c-09d2-4c99-8faa-9ec98808ae02How does Cigar-Butt Investing differ from othe...LLM1.0
3ed2ece58-3ce3-4cb9-a013-2e569bb14fcftell me a stock which will increase by 5 % in ...OutOfScope1.0
65ce399be-648c-4768-8e25-a73ca480d4e2Is MTCH revenue growing?CompanyAnalysis1.0
11f760f937-be70-4ab6-9ad8-3b69edae7a76What is Momentum investing?LLM1.0
...............
104218776314-d3b4-4fd4-a328-991b448c8a3cWhat is the intrinsic value of Brookfield corp...CompanyAnalysis1.0
104317e24d74-ce34-4c80-a4b0-00f91021e28aBuild me a stock portfolio before general elec...OutOfScope1.0
1064044fdc80-40d2-4322-8c5a-749bdb1c651dWhich sectors are most affected by economic cy...LLM1.0
10723c484112-d34d-459f-8e03-33d488088e4bwhat do you know about the sentiment of intel ...CompanyAnalysis1.0
1091f4fccd78-2c2a-4d97-9df6-d5504d87ee7eHiOutOfScope1.0
\n", "

200 rows × 4 columns

\n", "
" ], "text/plain": [ " username \\\n", "0 4b0d73e8-6ac2-4214-8c68-7d1fc36dd12d \n", "2 0f52d06c-09d2-4c99-8faa-9ec98808ae02 \n", "3 ed2ece58-3ce3-4cb9-a013-2e569bb14fcf \n", "6 5ce399be-648c-4768-8e25-a73ca480d4e2 \n", "11 f760f937-be70-4ab6-9ad8-3b69edae7a76 \n", "... ... \n", "1042 18776314-d3b4-4fd4-a328-991b448c8a3c \n", "1043 17e24d74-ce34-4c80-a4b0-00f91021e28a \n", "1064 044fdc80-40d2-4322-8c5a-749bdb1c651d \n", "1072 3c484112-d34d-459f-8e03-33d488088e4b \n", "1091 f4fccd78-2c2a-4d97-9df6-d5504d87ee7e \n", "\n", " text category \\\n", "0 What’s is the P/E of CVS compared to competitors CompanyAnalysis \n", "2 How does Cigar-Butt Investing differ from othe... LLM \n", "3 tell me a stock which will increase by 5 % in ... OutOfScope \n", "6 Is MTCH revenue growing? CompanyAnalysis \n", "11 What is Momentum investing? LLM \n", "... ... ... \n", "1042 What is the intrinsic value of Brookfield corp... CompanyAnalysis \n", "1043 Build me a stock portfolio before general elec... OutOfScope \n", "1064 Which sectors are most affected by economic cy... LLM \n", "1072 what do you know about the sentiment of intel ... CompanyAnalysis \n", "1091 Hi OutOfScope \n", "\n", " values \n", "0 1.0 \n", "2 1.0 \n", "3 1.0 \n", "6 1.0 \n", "11 1.0 \n", "... ... \n", "1042 1.0 \n", "1043 1.0 \n", "1064 1.0 \n", "1072 1.0 \n", "1091 1.0 \n", "\n", "[200 rows x 4 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "NUM_EXAMPLES=200\n", "\n", "router_tagged_df=pd.read_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Router_Tagging.csv')[1:].drop(['Others','NotesForOthers'],axis=1).fillna(0)\n", "router_tagged_df=router_tagged_df[router_tagged_df['AdyTagged']==1]\n", "router_tagged_df=router_tagged_df.drop(['AdyTagged'],axis=1).drop_duplicates()\n", "\n", "router_tagged_df = router_tagged_df.melt(id_vars=['username', 'question'], var_name='category', value_name='values')\n", "router_tagged_df=router_tagged_df.sample(frac=1).reset_index(drop=True)\n", "router_tagged_df=router_tagged_df.rename(columns={'question':'text'})\n", "router_tagged_df=router_tagged_df[router_tagged_df['values']==1]\n", "\n", "\n", "router_tagged_df_sample=router_tagged_df[0:NUM_EXAMPLES]\n", "router_tagged_df_sample\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 0%| | 0/600 [00:00._inner.._wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 112\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1546\u001b[0m, in \u001b[0;36mModelRunner.execute_model\u001b[0;34m(self, model_input, kv_caches, intermediate_tensors, num_steps)\u001b[0m\n\u001b[1;32m 1544\u001b[0m model_forward_start\u001b[38;5;241m.\u001b[39mrecord()\n\u001b[0;32m-> 1546\u001b[0m hidden_or_intermediate_states \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_executable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1547\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1548\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_positions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1549\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1550\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1551\u001b[0m \u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1552\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mMultiModalInputs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmulti_modal_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1553\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1554\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mseqlen_agnostic_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1556\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config\u001b[38;5;241m.\u001b[39mcollect_model_forward_time):\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:377\u001b[0m, in \u001b[0;36mMixtralForCausalLM.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 371\u001b[0m input_ids: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 375\u001b[0m intermediate_tensors: Optional[IntermediateTensors] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 376\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m--> 377\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 378\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 379\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:297\u001b[0m, in \u001b[0;36mMixtralModel.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m 296\u001b[0m layer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlayers[i]\n\u001b[0;32m--> 297\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 298\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_layer\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 299\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresidual\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:244\u001b[0m, in \u001b[0;36mMixtralDecoderLayer.forward\u001b[0;34m(self, positions, hidden_states, kv_cache, attn_metadata, residual)\u001b[0m\n\u001b[1;32m 242\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(\n\u001b[1;32m 243\u001b[0m hidden_states, residual)\n\u001b[0;32m--> 244\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mblock_sparse_moe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 245\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states, residual\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/mixtral.py:101\u001b[0m, in \u001b[0;36mMixtralMoE.forward\u001b[0;34m(self, hidden_states)\u001b[0m\n\u001b[1;32m 100\u001b[0m router_logits, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgate(hidden_states)\n\u001b[0;32m--> 101\u001b[0m final_hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexperts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrouter_logits\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m final_hidden_states\u001b[38;5;241m.\u001b[39mview(orig_shape)\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/layer.py:469\u001b[0m, in \u001b[0;36mFusedMoE.forward\u001b[0;34m(self, hidden_states, router_logits)\u001b[0m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# Matrix multiply.\u001b[39;00m\n\u001b[0;32m--> 469\u001b[0m final_hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_method\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 470\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 471\u001b[0m \u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 472\u001b[0m \u001b[43m \u001b[49m\u001b[43mrouter_logits\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouter_logits\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 473\u001b[0m \u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 474\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrenormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 475\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_grouped_topk\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43muse_grouped_topk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 476\u001b[0m \u001b[43m \u001b[49m\u001b[43mtopk_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtopk_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 477\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_expert_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_expert_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 478\u001b[0m \u001b[43m \u001b[49m\u001b[43mcustom_routing_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcustom_routing_function\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreduce_results \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtp_size \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/layer.py:78\u001b[0m, in \u001b[0;36mUnquantizedFusedMoEMethod.apply\u001b[0;34m(self, layer, x, router_logits, top_k, renormalize, use_grouped_topk, topk_group, num_expert_group, custom_routing_function)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\n\u001b[1;32m 66\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 67\u001b[0m layer: torch\u001b[38;5;241m.\u001b[39mnn\u001b[38;5;241m.\u001b[39mModule,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 75\u001b[0m custom_routing_function: Optional[Callable] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 76\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m---> 78\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mforward\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 79\u001b[0m \u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 80\u001b[0m \u001b[43m \u001b[49m\u001b[43mrouter_logits\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouter_logits\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 81\u001b[0m \u001b[43m \u001b[49m\u001b[43mtop_k\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtop_k\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 82\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenormalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrenormalize\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 83\u001b[0m \u001b[43m \u001b[49m\u001b[43muse_grouped_topk\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_grouped_topk\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 84\u001b[0m \u001b[43m \u001b[49m\u001b[43mtopk_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtopk_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 85\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_expert_group\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_expert_group\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 86\u001b[0m \u001b[43m \u001b[49m\u001b[43mcustom_routing_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcustom_routing_function\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/custom_op.py:14\u001b[0m, in \u001b[0;36mCustomOp.forward\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m---> 14\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_forward_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/layer.py:114\u001b[0m, in \u001b[0;36mUnquantizedFusedMoEMethod.forward_cuda\u001b[0;34m(self, layer, x, use_grouped_topk, top_k, router_logits, renormalize, topk_group, num_expert_group, custom_routing_function)\u001b[0m\n\u001b[1;32m 104\u001b[0m topk_weights, topk_ids \u001b[38;5;241m=\u001b[39m FusedMoE\u001b[38;5;241m.\u001b[39mselect_experts(\n\u001b[1;32m 105\u001b[0m hidden_states\u001b[38;5;241m=\u001b[39mx,\n\u001b[1;32m 106\u001b[0m router_logits\u001b[38;5;241m=\u001b[39mrouter_logits,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 111\u001b[0m num_expert_group\u001b[38;5;241m=\u001b[39mnum_expert_group,\n\u001b[1;32m 112\u001b[0m custom_routing_function\u001b[38;5;241m=\u001b[39mcustom_routing_function)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfused_experts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 115\u001b[0m \u001b[43m \u001b[49m\u001b[43mw1\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mw13_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mw2\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mw2_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mtopk_weights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtopk_weights\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mtopk_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtopk_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/fused_moe/fused_moe.py:509\u001b[0m, in \u001b[0;36mfused_experts\u001b[0;34m(hidden_states, w1, w2, topk_weights, topk_ids, inplace, override_config, use_fp8_w8a8, use_int8_w8a16, w1_scale, w2_scale, a1_scale, a2_scale)\u001b[0m\n\u001b[1;32m 507\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config_func(M)\n\u001b[0;32m--> 509\u001b[0m intermediate_cache1 \u001b[38;5;241m=\u001b[39m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mempty\u001b[49m\u001b[43m(\u001b[49m\u001b[43m(\u001b[49m\u001b[43mM\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtopk_ids\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshape\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mN\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 510\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 511\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhidden_states\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 512\u001b[0m intermediate_cache2 \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mempty((M \u001b[38;5;241m*\u001b[39m topk_ids\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], N \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m \u001b[38;5;241m2\u001b[39m),\n\u001b[1;32m 513\u001b[0m device\u001b[38;5;241m=\u001b[39mhidden_states\u001b[38;5;241m.\u001b[39mdevice,\n\u001b[1;32m 514\u001b[0m dtype\u001b[38;5;241m=\u001b[39mhidden_states\u001b[38;5;241m.\u001b[39mdtype)\n", "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 546.00 MiB. GPU 0 has a total capacity of 47.42 GiB of which 229.62 MiB is free. Process 2320625 has 260.00 MiB memory in use. Including non-PyTorch memory, this process has 46.89 GiB memory in use. Of the allocated memory 46.10 GiB is allocated by PyTorch, with 32.07 MiB allocated in private pools (e.g., CUDA Graphs), and 81.19 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 26\u001b[0m\n\u001b[1;32m 23\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m SamplingParams(temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.3\u001b[39m, max_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1200\u001b[39m)\n\u001b[1;32m 25\u001b[0m \u001b[38;5;66;03m# Generate outputs using the model\u001b[39;00m\n\u001b[0;32m---> 26\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mvllm_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mall_prompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m \u001b[38;5;66;03m# Process the generated outputs and store them in a dictionary\u001b[39;00m\n\u001b[1;32m 29\u001b[0m synthetic_questions_dict \u001b[38;5;241m=\u001b[39m {}\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/utils.py:1036\u001b[0m, in \u001b[0;36mdeprecate_kwargs..wrapper..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1029\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1031\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1032\u001b[0m \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m 1033\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m 1034\u001b[0m )\n\u001b[0;32m-> 1036\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:348\u001b[0m, in \u001b[0;36mLLM.generate\u001b[0;34m(self, prompts, sampling_params, prompt_token_ids, use_tqdm, lora_request, prompt_adapter_request, guided_options_request)\u001b[0m\n\u001b[1;32m 339\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m SamplingParams()\n\u001b[1;32m 341\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_and_add_requests(\n\u001b[1;32m 342\u001b[0m inputs\u001b[38;5;241m=\u001b[39minputs,\n\u001b[1;32m 343\u001b[0m params\u001b[38;5;241m=\u001b[39msampling_params,\n\u001b[1;32m 344\u001b[0m lora_request\u001b[38;5;241m=\u001b[39mlora_request,\n\u001b[1;32m 345\u001b[0m prompt_adapter_request\u001b[38;5;241m=\u001b[39mprompt_adapter_request,\n\u001b[1;32m 346\u001b[0m guided_options\u001b[38;5;241m=\u001b[39mguided_options_request)\n\u001b[0;32m--> 348\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_tqdm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_tqdm\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m LLMEngine\u001b[38;5;241m.\u001b[39mvalidate_outputs(outputs, RequestOutput)\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:715\u001b[0m, in \u001b[0;36mLLM._run_engine\u001b[0;34m(self, use_tqdm)\u001b[0m\n\u001b[1;32m 713\u001b[0m total_out_toks \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 714\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine\u001b[38;5;241m.\u001b[39mhas_unfinished_requests():\n\u001b[0;32m--> 715\u001b[0m step_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 716\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m step_outputs:\n\u001b[1;32m 717\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output\u001b[38;5;241m.\u001b[39mfinished:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:1223\u001b[0m, in \u001b[0;36mLLMEngine.step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1219\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m allow_async_output_proc:\n\u001b[1;32m 1220\u001b[0m execute_model_req\u001b[38;5;241m.\u001b[39masync_callback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39masync_callbacks[\n\u001b[1;32m 1221\u001b[0m virtual_engine]\n\u001b[0;32m-> 1223\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1224\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1226\u001b[0m \u001b[38;5;66;03m# We need to do this here so that last step's sampled_token_ids can\u001b[39;00m\n\u001b[1;32m 1227\u001b[0m \u001b[38;5;66;03m# be passed to the next iteration for PP.\u001b[39;00m\n\u001b[1;32m 1228\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscheduler_config\u001b[38;5;241m.\u001b[39mis_multi_step:\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/distributed_gpu_executor.py:78\u001b[0m, in \u001b[0;36mDistributedGPUExecutor.execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mparallel_worker_tasks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_run_workers(\n\u001b[1;32m 73\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstart_worker_execution_loop\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 74\u001b[0m async_run_tensor_parallel_workers_only\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 75\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mextra_execute_model_run_workers_kwargs)\n\u001b[1;32m 77\u001b[0m \u001b[38;5;66;03m# Only the driver worker returns the sampling results.\u001b[39;00m\n\u001b[0;32m---> 78\u001b[0m driver_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_driver_execute_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 79\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m driver_outputs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 80\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m driver_outputs\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/multiproc_gpu_executor.py:162\u001b[0m, in \u001b[0;36mMultiprocessingGPUExecutor._driver_execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_driver_execute_model\u001b[39m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28mself\u001b[39m, execute_model_req: Optional[ExecuteModelRequest]\n\u001b[1;32m 156\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Optional[List[SamplerOutput]]:\n\u001b[1;32m 157\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Run execute_model in the driver worker.\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \n\u001b[1;32m 159\u001b[0m \u001b[38;5;124;03m Passing None will cause the driver to stop the model execution\u001b[39;00m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;124;03m loop running in each of the remote workers.\u001b[39;00m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 162\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker_base.py:327\u001b[0m, in \u001b[0;36mLocalOrDistributedWorkerBase.execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m 322\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config\u001b[38;5;241m.\u001b[39mcollect_model_execute_time):\n\u001b[1;32m 324\u001b[0m orig_model_execute_time \u001b[38;5;241m=\u001b[39m intermediate_tensors\u001b[38;5;241m.\u001b[39mtensors\u001b[38;5;241m.\u001b[39mget(\n\u001b[1;32m 325\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_execute_time\u001b[39m\u001b[38;5;124m\"\u001b[39m, torch\u001b[38;5;241m.\u001b[39mtensor(\u001b[38;5;241m0\u001b[39m))\u001b[38;5;241m.\u001b[39mitem()\n\u001b[0;32m--> 327\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 329\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkv_cache\u001b[49m\u001b[43m[\u001b[49m\u001b[43mworker_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvirtual_engine\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 330\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkv_cache\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 331\u001b[0m \u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 332\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 336\u001b[0m model_execute_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mperf_counter() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m 337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n\u001b[1;32m 338\u001b[0m \u001b[38;5;66;03m# output is IntermediateTensors\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner_base.py:125\u001b[0m, in \u001b[0;36mdump_input_when_exception.._inner.._wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m i \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (exclude_args \u001b[38;5;129;01mor\u001b[39;00m []):\n\u001b[1;32m 124\u001b[0m dumped_inputs[\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marg_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mi\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m arg\n\u001b[0;32m--> 125\u001b[0m \u001b[43mpickle\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdump\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdumped_inputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfilep\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(err)(\n\u001b[1;32m 127\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError in model execution (input dumped to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfilename\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m): \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 128\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(err)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:563\u001b[0m, in \u001b[0;36mLLMEngine.__reduce__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 560\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__reduce__\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 561\u001b[0m \u001b[38;5;66;03m# This is to ensure that the LLMEngine is not referenced in\u001b[39;00m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;66;03m# the closure used to initialize Ray worker actors\u001b[39;00m\n\u001b[0;32m--> 563\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLLMEngine should not be pickled!\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", "\u001b[0;31mRuntimeError\u001b[0m: LLMEngine should not be pickled!" ] } ], "source": [ "import pandas as pd\n", "import os\n", "\n", "pd.set_option('max_colwidth', 1400)\n", "\n", "# Select a subset of the data\n", "all_questions = router_tagged_df_sample['text'].values.tolist()\n", "\n", "synthetic_questions_dict = {}\n", "all_prompts = []\n", "original_prompts = [] # To keep track of the original prompts\n", "\n", "for prompt in all_questions:\n", " Instruction = f\"\"\"[INST] Imagine you are an expert & award winning stock market analyst who specializes in asking the right questions relevant to a thorough stock research. Given a user query, generate 20 further precise, expert and relevant questions with focus on identifying similar companies related to the given query. I want exactly 20 questions. Your questions should be all-encompassing and can be answered independently. Don't use words like 'GIVEN QUERY' or 'GIVEN INDUSTRY'. If an industry or sector is provided, try to respond with diverse questions with similar sectors pertinent to financial markets. If a ratio like MarketCap or ROI is asked in the question, your response should have a slightly different ratio based on your stock market understanding. The questions should be brief and less than 20 words. If there is a company name mentioned in the question, please use the same company name (with paraphrasing as needed) in related questions. QUESTION: {prompt} Start with similar questions directly without any note or disclaimer.[/INST]\"\"\"\n", " all_prompts.append(Instruction)\n", " original_prompts.append(prompt)\n", "\n", "# Repeat the prompts for multiple iterations\n", "all_prompts = all_prompts * 3\n", "original_prompts = original_prompts * 3\n", "\n", "# Define the sampling parameters\n", "sampling_params = SamplingParams(temperature=0.3, max_tokens=1200)\n", "\n", "# Generate outputs using the model\n", "outputs = vllm_model.generate(all_prompts, sampling_params)\n", "\n", "# Process the generated outputs and store them in a dictionary\n", "synthetic_questions_dict = {}\n", "for output, original_prompt in zip(outputs, original_prompts):\n", " generated_text = output.outputs[0].text.strip() if output.outputs else 'N/A'\n", " similar_questions = generated_text.split('\\n')\n", " if original_prompt in synthetic_questions_dict:\n", " synthetic_questions_dict[original_prompt].extend(similar_questions)\n", " else:\n", " synthetic_questions_dict[original_prompt] = similar_questions\n", "\n", "# Convert the dictionary to a DataFrame\n", "synthetic_questions_df = pd.DataFrame(list(synthetic_questions_dict.items()), columns=['text', 'Questions'])\n", "\n", "# Explode the list of questions into separate rows\n", "synthetic_questions_df = synthetic_questions_df.explode('Questions')\n", "\n", "# Merge with the original DataFrame to include the 'category' column\n", "synthetic_questions_df = synthetic_questions_df.merge(router_tagged_df_sample[['text', 'category']], on='text').sample(frac=1)\n", "\n", "# Remove duplicates\n", "synthetic_questions_df = synthetic_questions_df.drop_duplicates()\n", "\n", "# Ensure the directory exists\n", "output_directory = \"/home/ady/Stockbuzz.ai_Data/RouterNER/\"\n", "os.makedirs(output_directory, exist_ok=True)\n", "\n", "# Save the DataFrame to the specified CSV file location\n", "output_file_path = os.path.join(output_directory, \"synthetic_questions_df.csv\")\n", "synthetic_questions_df.to_csv(output_file_path, index=False)\n", "\n", "# Display the final DataFrame\n", "synthetic_questions_df\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ " Router Model" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n" ] }, { "ename": "OutOfMemoryError", "evalue": "CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 47.42 GiB of which 6.38 MiB is free. Process 2320625 has 260.00 MiB memory in use. Process 2325452 has 260.00 MiB memory in use. Process 2328697 has 260.00 MiB memory in use. Process 2334035 has 260.00 MiB memory in use. Process 2398498 has 45.27 GiB memory in use. Including non-PyTorch memory, this process has 1.06 GiB memory in use. Of the allocated memory 810.76 MiB is allocated by PyTorch, and 11.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[2], line 99\u001b[0m\n\u001b[1;32m 97\u001b[0m \u001b[38;5;66;03m# Load the model\u001b[39;00m\n\u001b[1;32m 98\u001b[0m model \u001b[38;5;241m=\u001b[39m BertForSequenceClassification\u001b[38;5;241m.\u001b[39mfrom_pretrained(EMBEDDING_MODEL, num_labels\u001b[38;5;241m=\u001b[39mtrain_df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlabel\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mnunique())\n\u001b[0;32m---> 99\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m \u001b[38;5;66;03m# Move the model to the specified device\u001b[39;00m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# Define the compute_metrics function\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_metrics\u001b[39m(p):\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/modeling_utils.py:2883\u001b[0m, in \u001b[0;36mPreTrainedModel.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2878\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype_present_in_args:\n\u001b[1;32m 2879\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 2880\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou cannot cast a GPTQ model in a new `dtype`. Make sure to load the model using `from_pretrained` using the desired\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2881\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m `dtype` by passing the correct `torch_dtype` argument.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2882\u001b[0m )\n\u001b[0;32m-> 2883\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1174\u001b[0m, in \u001b[0;36mModule.to\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1171\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1172\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m-> 1174\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconvert\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:780\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 780\u001b[0m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 783\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 784\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 785\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 790\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 791\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:780\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 780\u001b[0m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 783\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 784\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 785\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 790\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 791\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n", " \u001b[0;31m[... skipping similar frames: Module._apply at line 780 (3 times)]\u001b[0m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:780\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 779\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 780\u001b[0m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 783\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 784\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 785\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 790\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 791\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:805\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 801\u001b[0m \u001b[38;5;66;03m# Tensors stored in modules are graph leaves, and we don't want to\u001b[39;00m\n\u001b[1;32m 802\u001b[0m \u001b[38;5;66;03m# track autograd history of `param_applied`, so we have to use\u001b[39;00m\n\u001b[1;32m 803\u001b[0m \u001b[38;5;66;03m# `with torch.no_grad():`\u001b[39;00m\n\u001b[1;32m 804\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 805\u001b[0m param_applied \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparam\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 806\u001b[0m p_should_use_set_data \u001b[38;5;241m=\u001b[39m compute_should_use_set_data(param, param_applied)\n\u001b[1;32m 808\u001b[0m \u001b[38;5;66;03m# subclasses may have multiple child tensors so we need to use swap_tensors\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1160\u001b[0m, in \u001b[0;36mModule.to..convert\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m 1153\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m convert_to_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m t\u001b[38;5;241m.\u001b[39mdim() \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;241m4\u001b[39m, \u001b[38;5;241m5\u001b[39m):\n\u001b[1;32m 1154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m t\u001b[38;5;241m.\u001b[39mto(\n\u001b[1;32m 1155\u001b[0m device,\n\u001b[1;32m 1156\u001b[0m dtype \u001b[38;5;28;01mif\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_floating_point() \u001b[38;5;129;01mor\u001b[39;00m t\u001b[38;5;241m.\u001b[39mis_complex() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1157\u001b[0m non_blocking,\n\u001b[1;32m 1158\u001b[0m memory_format\u001b[38;5;241m=\u001b[39mconvert_to_format,\n\u001b[1;32m 1159\u001b[0m )\n\u001b[0;32m-> 1160\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1162\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_floating_point\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_complex\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 1163\u001b[0m \u001b[43m \u001b[49m\u001b[43mnon_blocking\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1164\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1165\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(e) \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot copy out of meta tensor; no data!\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 16.00 MiB. GPU 0 has a total capacity of 47.42 GiB of which 6.38 MiB is free. Process 2320625 has 260.00 MiB memory in use. Process 2325452 has 260.00 MiB memory in use. Process 2328697 has 260.00 MiB memory in use. Process 2334035 has 260.00 MiB memory in use. Process 2398498 has 45.27 GiB memory in use. Including non-PyTorch memory, this process has 1.06 GiB memory in use. Of the allocated memory 810.76 MiB is allocated by PyTorch, and 11.24 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "from torch.utils.data import Dataset, DataLoader\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import precision_recall_fscore_support, accuracy_score\n", "from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback\n", "import gc\n", "import random\n", "import json\n", "import os\n", "\n", "# Set device to GPU 0\n", "device = torch.device(\"cuda:0\")\n", "\n", "# Clear GPU memory\n", "torch.cuda.empty_cache()\n", "gc.collect()\n", "\n", "# Load the training DataFrame\n", "synthetic_questions_df = pd.read_csv(\"/home/ady/Stockbuzz.ai_Data/RouterNER/synthetic_questions_df.csv\")\n", "\n", "# Updating Training Data in same format\n", "question_dict = synthetic_questions_df.groupby('text')['Questions'].apply(list).to_dict()\n", "\n", "# Function to create 'Question_Concatenate'\n", "def create_question_concatenate(row):\n", " primary_key = row['text']\n", " possible_values = question_dict[primary_key]\n", " if len(possible_values) < 2:\n", " selected_items = possible_values\n", " else:\n", " selected_items = random.sample(possible_values, 2)\n", " return f\"{row['text']},{row['Questions']},{selected_items[0]},{selected_items[1]}\"\n", "\n", "# Apply the function to create the new column\n", "synthetic_questions_df['Questions'] = synthetic_questions_df.apply(create_question_concatenate, axis=1)\n", "synthetic_questions_df['label'] = synthetic_questions_df['category'].astype('category').cat.codes\n", "\n", "# synthetic_questions_df=synthetic_questions_df.sample(n=min(100,len(synthetic_questions_df)))\n", "\n", "# Create the category-to-label mapping\n", "label_to_category = dict(enumerate(synthetic_questions_df['category'].astype('category').cat.categories))\n", "category_to_label = {v: k for k, v in label_to_category.items()}\n", "\n", "# Save the mapping to a JSON file in the model's directory\n", "os.makedirs('Router_Models', exist_ok=True)\n", "with open('Router_Models/label_to_category.json', 'w') as f:\n", " json.dump(label_to_category, f)\n", "with open('Router_Models/category_to_label.json', 'w') as f:\n", " json.dump(category_to_label, f)\n", "\n", "# Split data based on unique 'text'\n", "unique_sql_all = synthetic_questions_df['text'].unique().tolist()\n", "train_size = int(0.8 * len(unique_sql_all))\n", "val_size = int(0.2 * len(unique_sql_all))\n", "\n", "train_sqls = unique_sql_all[:train_size]\n", "val_sqls = unique_sql_all[train_size:train_size + val_size]\n", "\n", "train_df = synthetic_questions_df[synthetic_questions_df['text'].isin(train_sqls)]\n", "val_df = synthetic_questions_df[synthetic_questions_df['text'].isin(val_sqls)]\n", "\n", "# Initialize the tokenizer\n", "EMBEDDING_MODEL = 'google-bert/bert-large-uncased'\n", "tokenizer = BertTokenizer.from_pretrained(EMBEDDING_MODEL)\n", "\n", "# Tokenize the datasets\n", "def tokenize_data(data):\n", " return tokenizer(data['Questions'].astype(str).tolist(), padding=True, truncation=True, max_length=128, return_tensors='pt')\n", "\n", "train_tokens = tokenize_data(train_df)\n", "val_tokens = tokenize_data(val_df)\n", "\n", "train_labels = torch.tensor(train_df['label'].values, dtype=torch.long)\n", "val_labels = torch.tensor(val_df['label'].values, dtype=torch.long)\n", "\n", "# Create custom Dataset class\n", "class ArticleDataset(Dataset):\n", " def __init__(self, encodings, labels):\n", " self.encodings = encodings\n", " self.labels = labels\n", "\n", " def __getitem__(self, idx):\n", " item = {key: val[idx] for key, val in self.encodings.items()}\n", " item['labels'] = self.labels[idx]\n", " return item\n", "\n", " def __len__(self):\n", " return len(self.labels)\n", "\n", "train_dataset = ArticleDataset(train_tokens, train_labels)\n", "val_dataset = ArticleDataset(val_tokens, val_labels)\n", "\n", "# Load the model\n", "model = BertForSequenceClassification.from_pretrained(EMBEDDING_MODEL, num_labels=train_df['label'].nunique())\n", "model.to(device) # Move the model to the specified device\n", "\n", "# Define the compute_metrics function\n", "def compute_metrics(p):\n", " preds = np.argmax(p.predictions, axis=1)\n", " labels = p.label_ids\n", " precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n", " acc = accuracy_score(labels, preds)\n", " return {\n", " 'accuracy': acc,\n", " 'precision': precision,\n", " 'recall': recall,\n", " 'f1': f1,\n", " }\n", "\n", "# Define the training arguments\n", "training_args = TrainingArguments(\n", " output_dir='./results',\n", " num_train_epochs=10,\n", " per_device_train_batch_size=64,\n", " per_device_eval_batch_size=64,\n", " gradient_accumulation_steps=4,\n", " warmup_steps=500,\n", " weight_decay=0.01,\n", " logging_dir='./logs',\n", " logging_steps=10,\n", " evaluation_strategy=\"epoch\",\n", " save_strategy=\"epoch\",\n", " load_best_model_at_end=True,\n", " save_total_limit=1,\n", " no_cuda=False,\n", " dataloader_pin_memory=False,\n", ")\n", "\n", "# Define the trainer\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=val_dataset,\n", " compute_metrics=compute_metrics,\n", " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n", ")\n", "\n", "# Train the model\n", "trainer.train()\n", "\n", "# Save the model to the specified directory\n", "model.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/')\n", "tokenizer.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Router Inference" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Top 3 companies by market cap?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.31s/it, est. speed input: 105.19 toks/s, output: 43.29 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: SQL \n", "\n", "Compare revenues of Apple vs Micrsoft\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.31s/it, est. speed input: 105.18 toks/s, output: 43.28 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: CompanyAnalysis \n", "\n", "Latest revenues of Amazon\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.31s/it, est. speed input: 103.46 toks/s, output: 43.29 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: CompanyAnalysis \n", "\n", "Latest revenues of Sun Pharma in India?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.32s/it, est. speed input: 104.88 toks/s, output: 43.16 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: OutOfScope \n", "\n", "which cryptocurrency should I buy?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.31s/it, est. speed input: 104.20 toks/s, output: 43.24 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: OutOfScope \n", "\n", "What is Value investing?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.32s/it, est. speed input: 103.68 toks/s, output: 43.20 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: LLM \n", "\n", "What is AWS Bedrock?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.31s/it, est. speed input: 104.23 toks/s, output: 43.25 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: CompanyAnalysis \n", "\n", "Summarize latest earning call of Nvidia\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.32s/it, est. speed input: 104.94 toks/s, output: 43.18 toks/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: CompanyAnalysis \n", "\n", "How long will the a.i frenzy continue?\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Processed prompts: 100%|██████████| 1/1 [00:02<00:00, 2.32s/it, est. speed input: 105.36 toks/s, output: 43.18 toks/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Category: RAG \n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import torch\n", "from transformers import BertTokenizer, BertForSequenceClassification\n", "import openai\n", "import pandas as pd\n", "import json\n", "import numpy as np\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "# # Initialize OpenAI API client\n", "# OPEN_AI_API_KEY = \"EMPTY\"\n", "# OPENAI_API_BASE_MISTRAL = \"http://0.0.0.0:8000/v1\"\n", "# client_mistral = openai.OpenAI(api_key=OPEN_AI_API_KEY, base_url=OPENAI_API_BASE_MISTRAL)\n", "\n", "# TEXT_LLM_MODEL_MISTRAL = 'mistralai/Mistral-7B-Instruct-v0.2'\n", "\n", "# Load the model and tokenizer\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "model_router = BertForSequenceClassification.from_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models')\n", "tokenizer_router = BertTokenizer.from_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER//Router_Models')\n", "model_router.to(device) # Move the model to the specified device\n", "\n", "# Load the category-to-label mapping\n", "with open('Router_Models/label_to_category.json', 'r') as f:\n", " label_to_category_router = json.load(f)\n", "\n", "\n", "\n", "def predict_router(text):\n", " model_router.eval()\n", " inputs = tokenizer_router(text, return_tensors='pt', padding=True, truncation=True, max_length=128).to(device)\n", " with torch.no_grad():\n", " outputs = model_router(**inputs)\n", " logits = outputs.logits\n", " predicted_class_id = logits.argmax().item()\n", " return label_to_category_router[str(predicted_class_id)]\n", "\n", "def reformulate_query(question):\n", " \"\"\"Reformulate the given question using OpenAI API.\"\"\"\n", " INSTRUCTION = f\"\"\"[INST] Imagine you are an expert & award-winning stock market analyst who specializes in asking the right questions relevant for thorough stock research. \n", " Given a user query, generate 3 further precise, expert, and relevant questions. \n", " I want exactly 3 questions. Don't use words like 'GIVEN QUERY' or 'GIVEN INDUSTRY'. \n", " If an industry or sector is provided, try to respond with diverse questions with similar sectors pertinent to financial markets. \n", " If a ratio like MarketCap or ROI is asked in the question, your response should have a slightly different ratio based on your stock market understanding. \n", " The questions should be brief and less than 20 words. If there is a company name mentioned in the question, please use the same company name (with paraphrasing as needed) in related questions.\n", " If the question is asking about non-US stocks, please be explicit in the country name based on your knowledge. \n", " Don't change the intent of related questions, e.g., data-seeking question vs. knowledge-seeking question. QUESTION: {question} Start with similar questions directly without any note or disclaimer.[/INST]\"\"\"\n", "\n", " # response = client_mistral.chat.completions.create(\n", " # model=TEXT_LLM_MODEL_MISTRAL,\n", " # messages=[{\"role\": \"user\", \"content\": INSTRUCTION}],\n", " # max_tokens=100,\n", " # temperature=0.2\n", " # )\n", " # content = response.choices[0].message.content\n", " # reformulated_query = question + \", Related Question:\" + content\n", " \n", " # return reformulated_query\n", "\n", " sampling_params = SamplingParams(temperature=0.2, max_tokens=100)\n", " outputs = vllm_model.generate([INSTRUCTION], sampling_params=sampling_params)\n", " \n", " generated_text = outputs[0].outputs[0].text.strip() # Get the generated text from the model\n", " reformulated_query = question + \", Related Question: \" + generated_text\n", " \n", " return reformulated_query\n", "\n", "# Example usage:\n", "questions = [\"Top 3 companies by market cap?\",\n", " \"Compare revenues of Apple vs Micrsoft\",\n", " \"Latest revenues of Amazon\",\n", " \"Latest revenues of Sun Pharma in India?\",\n", " \"which cryptocurrency should I buy?\",\n", " \"What is Value investing?\",\n", " \"What is AWS Bedrock?\",\"Summarize latest earning call of Nvidia\",\"How long will the a.i frenzy continue?\"]\n", "for text in questions:\n", " print(text)\n", " reformulated_query_llm = reformulate_query(text)\n", " category = predict_router(reformulated_query_llm)\n", " print(f\"Category: {category}\",'\\n')\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Subrouter" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "SubRouter - Data " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from huggingface_hub import login\n", "import os \n", "import torch\n", "torch.cuda.empty_cache()\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", "# Use your Hugging Face token here\n", "login(\"\")\n", "\n", "from vllm import LLM, SamplingParams\n", "\n", "\n", "TEXT_LLM_MODEL_MISTRAL='meta-llama/Meta-Llama-3.1-8B-Instruct'\n", "\n", "\n", "\n", "vllm_model = LLM(\n", " model=TEXT_LLM_MODEL_MISTRAL,\n", " tensor_parallel_size=1, # Use both GPUs\n", " gpu_memory_utilization=0.95, # Lower memory utilization to prevent OOM\n", " max_model_len=26000)\n", " \n", " \n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import openai\n", "import aiohttp\n", "import asyncio\n", "import nest_asyncio\n", "import re\n", "pd.set_option('max_colwidth', 2400)\n", "\n", "\n", "categories = {\n", " 1: {\n", " \"name\": \"fair valuation\",\n", " \"prompt\": \"As a stock market analyst, generate a set of diverse and comprehensive questions that explore various aspects of determining the fair valuation of a company. These questions should cover methodologies such as discounted cash flow analysis. Ensure the questions address different valuation techniques or whether to buy or sell a stock or fair value of the company or anything about the stock price of a company.\"\n", " },\n", " 2: {\n", " \"name\": \"industry analysis\",\n", " \"prompt\": \"Generate a diverse range of questions aimed at conducting an in-depth analysis of different industries. The questions should cover industry trends, competitive landscape, regulatory impacts, and market opportunities. Include questions that focus on the analysis of industry such as electric vehicle, e-commerce, retail, technology etc.\"\n", " },\n", " 3: {\n", " \"name\": \"single company analysis\",\n", " \"prompt\": \"Create a set of detailed questions that focus on the comprehensive analysis of a single company. These questions should encompass the company's financial health, business model, management effectiveness, market position, and competitive advantages. Ensure the questions cover key financial statements, ratios, and strategic initiatives such as What do you think about Amazon stock.\"\n", " },\n", " 4: {\n", " \"name\": \"multiple companies analysis\",\n", " \"prompt\": \"Generate a variety of questions designed to compare and contrast multiple companies within the same industry or sector. These questions should necessarily have more than 1 company to compare and contrast on single or multiple parameters or same industry or different industry.\"\n", " }\n", "}\n", "\n", "# Define example questions for each category\n", "example_questions = {\n", " 1: [\n", " \"Will GME's stock price increase today?\",\n", " \"Abercrombie is trading at $190 per share; is it a good buy?\",\n", " \"What is the intrinsic value of Brookfield Corporation?\",\n", " \"Is Dynagas a good stock for the long run?\",\n", " \"Is Taiwan Semiconductor a good stock for the long run?\",\n", " \"Is it the right time to buy Nvidia stock?\",\n", " \"Why is Tesla considered overrated?\",\n", " \"What is the intrinsic value of Microsoft based on a discounted cash flow analysis?\",\n", " \"How does the price-to-earnings ratio of Amazon compare to its historical average?\",\n", " \"What is the fair value of Tesla based on comparable company analysis?\",\n", " \"How does the enterprise value to EBITDA ratio of Facebook compare to industry peers?\",\n", " \"What is the impact of interest rate changes on the valuation of Apple?\",\n", " \"How does the price-to-sales ratio of Google compare to other tech giants?\",\n", " \"What is the fair value of Netflix using a precedent transactions analysis?\",\n", " \"How do changes in tax policy affect the valuation of Amazon?\",\n", " \"What is the intrinsic value of Uber based on a discounted cash flow analysis?\",\n", " \"How does the price-to-book ratio of Microsoft compare to its historical average?\",\n", " \"What is the fair value of Facebook using a multi-stage growth model?\",\n", " \"How do changes in market conditions impact the valuation of Apple?\",\n", " \"What is the intrinsic value of Tesla based on a residual income model?\",\n", " \"How does the enterprise value to revenue ratio of Amazon compare to its peers?\",\n", " \"What is the fair value of Google using an excess earnings model?\",\n", " \"How do changes in cost of capital affect the valuation of Netflix?\",\n", " \"What is the intrinsic value of Uber based on a real options analysis?\",\n", " \"How does the price-to-cash-flow ratio of Microsoft compare to its historical average?\",\n", " \"What is the fair value of Facebook using an economic profit model?\",\n", " \"How do changes in growth expectations impact the valuation of Tesla?\"\n", " ],\n", " 2: [\n", " \"Will electric vehicles outperform the market this year?\",\n", " \"Analyze retail industry for me\",\n", " \"Is Artificial Intelligence a hype?\",\n", " \"What are the major trends in the e-commerce industry?\",\n", " \"How has the renewable energy sector evolved over the past decade?\",\n", " \"What are the key drivers of growth in the biotechnology industry?\",\n", " \"How has the competitive landscape changed in the automotive industry?\",\n", " \"What are the regulatory challenges facing the pharmaceutical industry?\",\n", " \"How has the rise of fintech impacted the financial services sector?\",\n", " \"What are the growth prospects for the telecommunications industry?\",\n", " \"How has the adoption of AI affected the technology sector?\",\n", " \"What are the key challenges facing the retail industry?\",\n", " \"How has the global supply chain crisis impacted the manufacturing industry?\",\n", " \"What are the major trends in the healthcare industry?\",\n", " \"How has the rise of streaming services affected the media industry?\",\n", " \"What are the key opportunities in the electric vehicle market?\",\n", " \"How has the COVID-19 pandemic impacted the travel and tourism industry?\",\n", " \"What are the growth drivers for the cybersecurity industry?\",\n", " \"How has the real estate market evolved in recent years?\",\n", " \"What are the key factors influencing the oil and gas industry?\",\n", " \"How has the shift to remote work impacted the tech industry?\",\n", " \"What are the major challenges facing the agriculture industry?\",\n", " \"How has the rise of social media influenced the advertising industry?\"\n", " ],\n", " 3: [\n", " \"What are the key risks for HRBR?\",\n", " \"Analyse the financial health of CVS.\",\n", " \"What is the revenue trend for Arch Resources?\",\n", " \"Is Amazon a good buy?\",\n", " \"Tell me about Amazon's business model.\",\n", " \"Analyse the latest earnings call of Meta.\",\n", " \"How have Amazon's focus areas changed?\",\n", " \"What are the key risks investing in Nvidia?\",\n", " \"What are the key risks investing in Starbucks?\",\n", " \"What is Starbucks' revenue breakdown?\",\n", " \"What is Apple's revenue breakdown?\",\n", " \"What is the ROI of Starbucks?\",\n", " \"Provide an overview of BYD.\",\n", " \"What are the key financial ratios for evaluating Apple's performance?\",\n", " \"How has Tesla's market share evolved over the past five years?\",\n", " \"What are the major revenue streams for Alphabet (Google)?\",\n", " \"What is the impact of recent regulatory changes on Facebook's business?\",\n", " \"How does Microsoft's cloud business compare to its other segments?\",\n", " \"What are the growth prospects for Netflix in international markets?\",\n", " \"What are the key risks associated with investing in Uber?\",\n", " \"How has Starbucks' expansion strategy impacted its financial performance?\",\n", " \"What are the main factors driving Amazon's profitability?\",\n", " \"How has Disney's acquisition strategy affected its financial health?\",\n", " \"What are the key financial metrics for evaluating IBM's performance?\",\n", " \"How has Twitter's user growth impacted its revenue?\",\n", " \"What are the strategic initiatives of Coca-Cola to maintain market leadership?\",\n", " \"How has Pfizer's R&D spending influenced its product pipeline?\",\n", " \"What are the major cost components for Walmart?\",\n", " \"How does Apple's product diversification impact its revenue stability?\",\n", " \"What are the competitive advantages of Nvidia in the semiconductor industry?\",\n", " \"How has Tesla's focus on autonomous driving technology affected its market position?\",\n", " \"What are the financial implications of Google's recent acquisitions?\",\n", " \"How has Microsoft's focus on subscription services impacted its revenue?\"\n", " ],\n", " 4: [\n", " \"Compare the fundamentals of AMD and Intel over the last 5 years.\",\n", " \"How does Nvidia's profitability compare to other semiconductor companies?\",\n", " \"Costco Vs Walmart\",\n", " \"How do the financial metrics of Apple compare to those of Samsung?\",\n", " \"What are the key differences in business models between Amazon and Alibaba?\",\n", " \"How does the profitability of Google compare to Facebook?\",\n", " \"What are the growth prospects for Netflix compared to Disney+?\",\n", " \"How does the market share of Ford compare to General Motors?\",\n", " \"What are the key financial metrics for comparing Coca-Cola and PepsiCo?\",\n", " \"How does the revenue growth of Microsoft compare to IBM?\",\n", " \"What are the major differences in cost structures between Walmart and Target?\",\n", " \"How does the dividend policy of Verizon compare to AT&T?\",\n", " \"What are the key financial ratios for comparing ExxonMobil and Chevron?\",\n", " \"How does the R&D spending of Intel compare to AMD?\",\n", " \"What are the growth strategies of Uber compared to Lyft?\",\n", " \"How does the market capitalization of Tesla compare to Ford?\",\n", " \"What are the key differences in revenue streams between Netflix and Hulu?\",\n", " \"How does the financial performance of Visa compare to Mastercard?\",\n", " \"What are the competitive advantages of Nike compared to Adidas?\",\n", " \"How does the profitability of Procter & Gamble compare to Unilever?\",\n", " \"What are the key financial metrics for comparing JPMorgan Chase and Goldman Sachs?\",\n", " \"How does the market share of McDonald's compare to Burger King?\",\n", " \"What are the major differences in business strategies between Apple and Microsoft?\"\n", " ]\n", "}\n", "\n", "df_list=[]\n", "for category_id, details in categories.items():\n", " for example_question in example_questions[category_id]:\n", " for _ in range(100): # Repeat 10 times\n", " prompt = f\"\"\" Category: {category_id}. You are an expert stock market analyst tasked with creating questions for training a model. Here are some example questions for reference:\\n\\n{example_question}\\n\\nNow, generate 1 unique, diverse, and insightful question specifically for {details['name']}. Ensure the question provides significant analytical depth and covers the key aspects outlined. Start generating the questions directly without any preamble. Please remember that you have to generate just one question and stop your response once you have provided the question, Provide question in xml tags i.e ...\"\"\"\n", " df_list.append({'Category': details['name'], 'Prompt': prompt})\n", "\n", "# Create a DataFrame from the list\n", "full_df = pd.DataFrame(df_list)\n", "\n", "prompts=[]\n", "all_questions=full_df['Prompt'].values.tolist()\n", "\n", "for question in all_questions:\n", " prompts.append(question)\n", "\n", "sampling_params = SamplingParams(\n", " temperature=0.7,\n", " max_tokens=100\n", " )\n", "\n", "outputs = vllm_model.generate(prompts, sampling_params)\n", "\n", "llm_output=[]\n", "for output in outputs:\n", " try:\n", " generated_text = output.outputs[0].text\n", " match = re.search(r'(.*?)', generated_text, re.DOTALL)\n", " question_text = match.group(1).strip() # Strips any leading/trailing whitespace\n", "\n", "\n", " llm_output.append(question_text)\n", " except:\n", " llm_output.append('')\n", "\n", "\n", "full_df['Response'] =llm_output\n", "display(full_df)\n", "full_df=full_df[full_df['Response']!='']\n", "full_df.to_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Combined_Ady_Database.csv', index=False)# print(\"Synthetic questions dataset created and saved to 'Ady_Database4.csv'.\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "SubRouter - Model" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n", "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n", "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-large-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n", "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n", "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33m19aditiyadav\u001b[0m (\u001b[33m19aditiyadav-stockbuzz\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" ] }, { "data": { "text/html": [ "wandb version 0.18.0 is available! To upgrade, please run:\n", " $ pip install wandb --upgrade" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Tracking run with wandb version 0.17.7" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/ady/StockBuzz_Experiments/AgentsTest/NER/wandb/run-20240915_202121-m4a5bz9k" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run ./results to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/19aditiyadav-stockbuzz/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/19aditiyadav-stockbuzz/huggingface/runs/m4a5bz9k" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [310/310 05:22, Epoch 5/5]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossAccuracyPrecisionRecallF1
10.9480000.7753630.8860820.8935030.8860820.887385
20.2358000.1696790.9597940.9605940.9597940.959883
30.1526000.1234180.9649480.9650720.9649480.964948
40.0868000.1458990.9639180.9646660.9639180.963899
50.0760000.1610850.9623710.9632150.9623710.962410

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "('SubRouter_Models/tokenizer_config.json',\n", " 'SubRouter_Models/special_tokens_map.json',\n", " 'SubRouter_Models/vocab.txt',\n", " 'SubRouter_Models/added_tokens.json')" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import torch\n", "import torch.nn as nn\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n", "from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback\n", "import gc\n", "import os\n", "import json\n", "\n", "import os\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n", "\n", "\n", "\n", "# Set device to GPU 0 if available\n", "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "# Clear GPU memory\n", "torch.cuda.empty_cache()\n", "gc.collect()\n", "\n", "# Load the generated CSV\n", "df = pd.read_csv(\"/home/ady/Stockbuzz.ai_Data/RouterNER/Combined_Ady_Database.csv\")\n", "df = df.sample(n=min(10000,len(df)))\n", "\n", "df.rename(columns={'Category': 'category', 'Response': 'Questions'}, inplace=True)\n", "\n", "# Create the category-to-label mapping\n", "df['label'] = df['category'].astype('category').cat.codes\n", "label_to_category = dict(enumerate(df['category'].astype('category').cat.categories))\n", "category_to_label = {v: k for k, v in label_to_category.items()}\n", "\n", "# Save the mapping to a JSON file in the model's directory\n", "os.makedirs('SubRouter_Models', exist_ok=True)\n", "with open('SubRouter_Models/label_to_category.json', 'w') as f:\n", " json.dump(label_to_category, f)\n", "with open('SubRouter_Models/category_to_label.json', 'w') as f:\n", " json.dump(category_to_label, f)\n", "\n", "# Split the dataset into training and validation sets based on unique 'Questions'\n", "unique_questions = df['Questions'].unique().tolist()\n", "train_size = int(0.8 * len(unique_questions))\n", "val_size = int(0.2 * len(unique_questions))\n", "\n", "train_questions = unique_questions[:train_size]\n", "val_questions = unique_questions[train_size:train_size + val_size]\n", "\n", "train_df = df[df['Questions'].isin(train_questions)]\n", "val_df = df[df['Questions'].isin(val_questions)]\n", "\n", "# Initialize the BERT tokenizer and model\n", "checkpoint=\"google-bert/bert-large-uncased\"\n", "tokenizer_subrouter = BertTokenizer.from_pretrained(checkpoint)\n", "model_subrouter = BertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(df['category'].unique()))\n", "model_subrouter.to(device) # Move the model to the specified device\n", "\n", "# Tokenize the datasets\n", "def tokenize_data(data):\n", " return tokenizer_subrouter(data['Questions'].astype(str).tolist(), padding=True, truncation=True, max_length=64, return_tensors='pt')\n", "\n", "train_tokens = tokenize_data(train_df)\n", "val_tokens = tokenize_data(val_df)\n", "\n", "train_labels = torch.tensor(train_df['label'].values, dtype=torch.long)\n", "val_labels = torch.tensor(val_df['label'].values, dtype=torch.long)\n", "\n", "# Create custom Dataset class\n", "class ArticleDataset(torch.utils.data.Dataset):\n", " def __init__(self, encodings, labels):\n", " self.encodings = encodings\n", " self.labels = labels\n", "\n", " def __getitem__(self, idx):\n", " item = {key: val[idx] for key, val in self.encodings.items()}\n", " item['labels'] = self.labels[idx]\n", " return item\n", "\n", " def __len__(self):\n", " return len(self.labels)\n", "\n", "train_dataset = ArticleDataset(train_tokens, train_labels)\n", "val_dataset = ArticleDataset(val_tokens, val_labels)\n", "\n", "# Define the compute_metrics function for evaluation\n", "def compute_metrics(pred):\n", " labels = pred.label_ids\n", " preds = pred.predictions.argmax(-1)\n", " precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n", " acc = accuracy_score(labels, preds)\n", " return {\n", " 'accuracy': acc,\n", " 'precision': precision,\n", " 'recall': recall,\n", " 'f1': f1,\n", " }\n", "\n", "# Define the training arguments\n", "training_args = TrainingArguments(\n", " output_dir='./results',\n", " num_train_epochs=5,\n", " per_device_train_batch_size=128,\n", " per_device_eval_batch_size=128,\n", " warmup_steps=500,\n", " weight_decay=0.01,\n", " logging_dir='./logs',\n", " logging_steps=10,\n", " evaluation_strategy=\"epoch\",\n", " save_strategy=\"epoch\",\n", " load_best_model_at_end=True,\n", " save_total_limit=1,\n", " no_cuda=False,\n", " dataloader_pin_memory=False,\n", ")\n", "\n", "# Define the trainer\n", "trainer = Trainer(\n", " model=model_subrouter,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=val_dataset,\n", " compute_metrics=compute_metrics,\n", " callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n", ")\n", "\n", "# Train the model\n", "trainer.train()\n", "\n", "# Save the model and tokenizer to the specified directory\n", "model_subrouter.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/')\n", "tokenizer_subrouter.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "SubRouter Inference" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Question: Compare depreciation ttm of amazon and intol\n", "Predicted Category: multiple companies analysis\n", "\n", "Question: What is the current market trend for technology stocks?\n", "Predicted Category: industry analysis\n", "\n", "Question: How did shakti pumps perform in the last quarter?\n", "Predicted Category: single company analysis\n", "\n", "Question: Compare Amazon vs Apple in terms of revenues\n", "Predicted Category: multiple companies analysis\n", "\n", "Question: What is the reasonable price of Nvidia?\n", "Predicted Category: fair valuation\n", "\n", "Question: Compare Apple vs Micrsoft\n", "Predicted Category: multiple companies analysis\n", "\n" ] } ], "source": [ "import torch\n", "from transformers import BertTokenizer, BertForSequenceClassification\n", "import json\n", "\n", "# Load the model and tokenizer\n", "device = torch.device(\"cuda\")\n", "model_subrouter = BertForSequenceClassification.from_pretrained('./SubRouter_Models')\n", "tokenizer_subrouter = BertTokenizer.from_pretrained('./SubRouter_Models')\n", "model_subrouter.to(device) # Move the model to the specified device\n", "\n", "# Load the category-to-label mapping\n", "with open('SubRouter_Models/label_to_category.json', 'r') as f:\n", " label_to_category_subrouter = json.load(f)\n", "\n", "def predict_subrouter(text):\n", " model_subrouter.eval()\n", " inputs = tokenizer_subrouter(text, return_tensors='pt', padding=True, truncation=True, max_length=64).to(device)\n", " with torch.no_grad():\n", " outputs = model_subrouter(**inputs)\n", " logits = outputs.logits\n", " predicted_class_id = logits.argmax().item()\n", " return label_to_category_subrouter[str(predicted_class_id)]\n", "\n", "# Example usage:\n", "questions = [\"Compare depreciation ttm of amazon and intol\",\n", " \"What is the current market trend for technology stocks?\",\n", " \"How did shakti pumps perform in the last quarter?\",\n", " \"Compare Amazon vs Apple in terms of revenues\",\n", " \"What is the reasonable price of Nvidia?\",\n", " \"Compare Apple vs Micrsoft\"]\n", "for text in questions:\n", " category = predict_subrouter(text)\n", " print(f\"Question: {text}\\nPredicted Category: {category}\\n\")\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NER" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: fineGrained).\n", "Your token has been saved to /home/ady/.cache/huggingface/token\n", "Login successful\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-09-15 21:07:20,332\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "INFO 09-15 21:07:20 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=26000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llama/Meta-Llama-3.1-8B-Instruct, use_v2_block_manager=False, num_scheduler_steps=1, enable_prefix_caching=False, use_async_output_proc=True)\n", "INFO 09-15 21:07:21 model_runner.py:997] Starting to load model meta-llama/Meta-Llama-3.1-8B-Instruct...\n", "INFO 09-15 21:07:22 weight_utils.py:242] Using model weights format ['*.safetensors']\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00 18\u001b[0m vllm_model \u001b[38;5;241m=\u001b[39m \u001b[43mLLM\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTEXT_LLM_MODEL_MISTRAL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensor_parallel_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Use both GPUs\u001b[39;49;00m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mgpu_memory_utilization\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.95\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Lower memory utilization to prevent OOM\u001b[39;49;00m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_model_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m26000\u001b[39;49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:178\u001b[0m, in \u001b[0;36mLLM.__init__\u001b[0;34m(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_context_len_to_capture, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, **kwargs)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThere is no need to pass vision-related arguments anymore.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 156\u001b[0m engine_args \u001b[38;5;241m=\u001b[39m EngineArgs(\n\u001b[1;32m 157\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 158\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 177\u001b[0m )\n\u001b[0;32m--> 178\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine \u001b[38;5;241m=\u001b[39m \u001b[43mLLMEngine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_engine_args\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 179\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43musage_context\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mUsageContext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mLLM_CLASS\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter \u001b[38;5;241m=\u001b[39m Counter()\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:550\u001b[0m, in \u001b[0;36mLLMEngine.from_engine_args\u001b[0;34m(cls, engine_args, usage_context, stat_loggers)\u001b[0m\n\u001b[1;32m 548\u001b[0m executor_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_executor_cls(engine_config)\n\u001b[1;32m 549\u001b[0m \u001b[38;5;66;03m# Create the LLM engine.\u001b[39;00m\n\u001b[0;32m--> 550\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 551\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mengine_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecutor_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecutor_class\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 553\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_stats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdisable_log_stats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[43m \u001b[49m\u001b[43musage_context\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43musage_context\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 555\u001b[0m \u001b[43m \u001b[49m\u001b[43mstat_loggers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstat_loggers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 556\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 558\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m engine\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:331\u001b[0m, in \u001b[0;36mLLMEngine.__init__\u001b[0;34m(self, model_config, cache_config, parallel_config, scheduler_config, device_config, load_config, lora_config, speculative_config, decoding_config, observability_config, prompt_adapter_config, executor_class, log_stats, usage_context, stat_loggers, input_registry)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_executor \u001b[38;5;241m=\u001b[39m executor_class(\n\u001b[1;32m 318\u001b[0m model_config\u001b[38;5;241m=\u001b[39mmodel_config,\n\u001b[1;32m 319\u001b[0m cache_config\u001b[38;5;241m=\u001b[39mcache_config,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 327\u001b[0m observability_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config,\n\u001b[1;32m 328\u001b[0m )\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39membedding_mode:\n\u001b[0;32m--> 331\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_kv_caches\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;66;03m# If usage stat is enabled, collect relevant info.\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_usage_stats_enabled():\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:473\u001b[0m, in \u001b[0;36mLLMEngine._initialize_kv_caches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_gpu_blocks \u001b[38;5;241m=\u001b[39m num_gpu_blocks\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_cpu_blocks \u001b[38;5;241m=\u001b[39m num_cpu_blocks\n\u001b[0;32m--> 473\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_gpu_blocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cpu_blocks\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/gpu_executor.py:125\u001b[0m, in \u001b[0;36mGPUExecutor.initialize_cache\u001b[0;34m(self, num_gpu_blocks, num_cpu_blocks)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;66;03m# NOTE: This is logged in the executor because there can be >1 worker\u001b[39;00m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# with other executors. We could log in the engine level, but work\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# remains to abstract away the device for non-GPU configurations.\u001b[39;00m\n\u001b[1;32m 122\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m# GPU blocks: \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m, # CPU blocks: \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, num_gpu_blocks,\n\u001b[1;32m 123\u001b[0m num_cpu_blocks)\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_gpu_blocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cpu_blocks\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker.py:266\u001b[0m, in \u001b[0;36mWorker.initialize_cache\u001b[0;34m(self, num_gpu_blocks, num_cpu_blocks)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_cpu_blocks \u001b[38;5;241m=\u001b[39m num_cpu_blocks\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_cache_engine()\n\u001b[0;32m--> 266\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_warm_up_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker.py:282\u001b[0m, in \u001b[0;36mWorker._warm_up_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_warm_up_model\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39menforce_eager:\n\u001b[0;32m--> 282\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapture_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgpu_cache\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;66;03m# Reset the seed to ensure that the random state is not affected by\u001b[39;00m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;66;03m# the model initialization and profiling.\u001b[39;00m\n\u001b[1;32m 285\u001b[0m set_random_seed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mseed)\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1422\u001b[0m, in \u001b[0;36mGPUModelRunnerBase.capture_model\u001b[0;34m(self, kv_caches)\u001b[0m\n\u001b[1;32m 1415\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_seqlen_agnostic:\n\u001b[1;32m 1416\u001b[0m \u001b[38;5;66;03m# Only used by Mamba-based models CUDA graph atm (Jamba)\u001b[39;00m\n\u001b[1;32m 1417\u001b[0m capture_inputs\u001b[38;5;241m.\u001b[39mupdate({\n\u001b[1;32m 1418\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseqlen_agnostic_capture_inputs\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1419\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mget_seqlen_agnostic_capture_inputs(\n\u001b[1;32m 1420\u001b[0m batch_size)\n\u001b[1;32m 1421\u001b[0m })\n\u001b[0;32m-> 1422\u001b[0m \u001b[43mgraph_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapture\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcapture_inputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1423\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_memory_pool \u001b[38;5;241m=\u001b[39m graph_runner\u001b[38;5;241m.\u001b[39mgraph\u001b[38;5;241m.\u001b[39mpool()\n\u001b[1;32m 1424\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_runners[virtual_engine][batch_size] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1425\u001b[0m graph_runner)\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1665\u001b[0m, in \u001b[0;36mCUDAGraphRunner.capture\u001b[0;34m(self, input_ids, positions, hidden_or_intermediate_states, intermediate_inputs, kv_caches, attn_metadata, memory_pool, stream, **kwargs)\u001b[0m\n\u001b[1;32m 1660\u001b[0m \u001b[38;5;66;03m# Run the model a few times without capturing the graph.\u001b[39;00m\n\u001b[1;32m 1661\u001b[0m \u001b[38;5;66;03m# This is to make sure that the captured graph does not include the\u001b[39;00m\n\u001b[1;32m 1662\u001b[0m \u001b[38;5;66;03m# kernel launches for initial benchmarking (e.g., Triton autotune).\u001b[39;00m\n\u001b[1;32m 1663\u001b[0m \u001b[38;5;66;03m# Note one iteration is not enough for torch.jit.script\u001b[39;00m\n\u001b[1;32m 1664\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(_NUM_WARMUP_ITERS):\n\u001b[0;32m-> 1665\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1666\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1667\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1668\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1669\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1670\u001b[0m \u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1671\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1672\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1673\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39msynchronize()\n\u001b[1;32m 1675\u001b[0m \u001b[38;5;66;03m# Capture the graph.\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:448\u001b[0m, in \u001b[0;36mLlamaForCausalLM.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\n\u001b[1;32m 441\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 442\u001b[0m input_ids: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 446\u001b[0m intermediate_tensors: Optional[IntermediateTensors] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 447\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[torch\u001b[38;5;241m.\u001b[39mTensor, IntermediateTensors]:\n\u001b[0;32m--> 448\u001b[0m model_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 449\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_output\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:329\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors, inputs_embeds)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstart_layer, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mend_layer):\n\u001b[1;32m 328\u001b[0m layer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlayers[i]\n\u001b[0;32m--> 329\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 330\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 331\u001b[0m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 332\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_layer\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[43m \u001b[49m\u001b[43mresidual\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 335\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n\u001b[1;32m 338\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m IntermediateTensors({\n\u001b[1;32m 339\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhidden_states\u001b[39m\u001b[38;5;124m\"\u001b[39m: hidden_states,\n\u001b[1;32m 340\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresidual\u001b[39m\u001b[38;5;124m\"\u001b[39m: residual\n\u001b[1;32m 341\u001b[0m })\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:261\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, positions, hidden_states, kv_cache, attn_metadata, residual)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n\u001b[1;32m 259\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(\n\u001b[1;32m 260\u001b[0m hidden_states, residual)\n\u001b[0;32m--> 261\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states, residual\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:87\u001b[0m, in \u001b[0;36mLlamaMLP.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m---> 87\u001b[0m gate_up, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgate_up_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mact_fn(gate_up)\n\u001b[1;32m 89\u001b[0m x, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdown_proj(x)\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py:367\u001b[0m, in \u001b[0;36mColumnParallelLinear.forward\u001b[0;34m(self, input_)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;66;03m# Matrix multiply.\u001b[39;00m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 367\u001b[0m output_parallel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_method\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgather_output:\n\u001b[1;32m 369\u001b[0m \u001b[38;5;66;03m# All-gather across the partitions.\u001b[39;00m\n\u001b[1;32m 370\u001b[0m output \u001b[38;5;241m=\u001b[39m tensor_model_parallel_all_gather(output_parallel)\n", "File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py:135\u001b[0m, in \u001b[0;36mUnquantizedLinearMethod.apply\u001b[0;34m(self, layer, x, bias)\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 131\u001b[0m layer: torch\u001b[38;5;241m.\u001b[39mnn\u001b[38;5;241m.\u001b[39mModule,\n\u001b[1;32m 132\u001b[0m x: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[1;32m 133\u001b[0m bias: Optional[torch\u001b[38;5;241m.\u001b[39mTensor] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m--> 135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.43 GiB of which 16.69 MiB is free. Including non-PyTorch memory, this process has 47.38 GiB memory in use. Of the allocated memory 46.94 GiB is allocated by PyTorch, with 33.31 MiB allocated in private pools (e.g., CUDA Graphs), and 41.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" ] } ], "source": [ "from huggingface_hub import login\n", "import os \n", "import torch\n", "torch.cuda.empty_cache()\n", "\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", "# Use your Hugging Face token here\n", "login(\"\")\n", "\n", "from vllm import LLM, SamplingParams\n", "\n", "\n", "TEXT_LLM_MODEL_MISTRAL='meta-llama/Meta-Llama-3.1-8B-Instruct'\n", "\n", "\n", "\n", "vllm_model = LLM(\n", " model=TEXT_LLM_MODEL_MISTRAL,\n", " tensor_parallel_size=1, # Use both GPUs\n", " gpu_memory_utilization=0.95, # Lower memory utilization to prevent OOM\n", " max_model_len=26000)\n", " \n", " \n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NER Data Creation" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", "Collecting matplotlib\n", " Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n", "Collecting contourpy>=1.0.1 (from matplotlib)\n", " Downloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.4 kB)\n", "Collecting cycler>=0.10 (from matplotlib)\n", " Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n", "Collecting fonttools>=4.22.0 (from matplotlib)\n", " Downloading fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (162 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.6/162.6 kB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)\n", " Downloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)\n", "Requirement already satisfied: numpy>=1.23 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (24.1)\n", "Requirement already satisfied: pillow>=8 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (10.4.0)\n", "Collecting pyparsing>=2.3.1 (from matplotlib)\n", " Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)\n", "Requirement already satisfied: python-dateutil>=2.7 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (2.9.0)\n", "Requirement already satisfied: six>=1.5 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n", "Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.3/8.3 MB\u001b[0m \u001b[31m99.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.2/323.2 kB\u001b[0m \u001b[31m134.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n", "Downloading fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m100.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", "\u001b[?25hDownloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m122.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading pyparsing-3.1.4-py3-none-any.whl (104 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.1/104.1 kB\u001b[0m \u001b[31m547.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: pyparsing, kiwisolver, fonttools, cycler, contourpy, matplotlib\n", "Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.53.1 kiwisolver-1.4.7 matplotlib-3.9.2 pyparsing-3.1.4\n" ] } ], "source": [ "!pip install matplotlib\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", "To disable this warning, you can either:\n", "\t- Avoid using `tokenizers` before the fork if possible\n", "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", "Collecting seqeval\n", " Downloading seqeval-1.2.2.tar.gz (43 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m249.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", "\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from seqeval) (1.26.4)\n", "Requirement already satisfied: scikit-learn>=0.21.3 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from seqeval) (1.5.1)\n", "Requirement already satisfied: scipy>=1.6.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (1.14.1)\n", "Requirement already satisfied: joblib>=1.2.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n", "Building wheels for collected packages: seqeval\n", " Building wheel for seqeval (setup.py) ... \u001b[?25ldone\n", "\u001b[?25h Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=3ad02021d3334b570a31b1504d9d0c4da3569f9ff09c9efb6da8b2b37276ac87\n", " Stored in directory: /tmp/pip-ephem-wheel-cache-a6hygjhv/wheels/bc/92/f0/243288f899c2eacdfa8c5f9aede4c71a9bad0ee26a01dc5ead\n", "Successfully built seqeval\n", "Installing collected packages: seqeval\n", "Successfully installed seqeval-1.2.2\n" ] } ], "source": [ "!pip install seqeval\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import aiohttp\n", "import asyncio\n", "import nest_asyncio\n", "import random\n", "import re\n", "pd.set_option('max_colwidth', 2400)\n", "\n", "# # Apply the nest_asyncio patch\n", "# nest_asyncio.apply()\n", "\n", "# # Set environment variables\n", "# OPEN_AI_API_KEY = \"EMPTY\" # Replace with your actual API key\n", "# OPENAI_API_BASE_MIXTRAL = os.getenv(\"OPENAI_API_BASE_MIXTRAL\", \"http://0.0.0.0:8004/v1\")\n", "# TEXT_LLM_MODEL_MIXTRAL = 'cognitivecomputations/dolphin-2.8-mistral-7b-v02'\n", "\n", "# List of metrics to be included in questions\n", "metrics = [\n", " \"Return on Capital Employed\",\n", " \"Debt ratio\",\n", " \"Debt-Equity ratio\",\n", " \"Interest Coverage\",\n", " \"Dividend Payout ratio\",\n", " \"PE Ratio\",\n", " \"PE to Growth ratio\",\n", " \"Dividend yield\",\n", " \"Enterprise Value Multiple\",\n", " \"Dividend yield TTM\",\n", " \"Dividend yield percentage TTM\",\n", " \"PE ratio TTM\",\n", " \"PEF ratio TTM\",\n", " \"Current ratio TTM\",\n", " \"Quick ratio TTM\",\n", " \"Gross Profit Margin TTM\",\n", " \"Operating Profit Margin TTM\",\n", " \"Net-Profit Margin TTM\",\n", " \"Return on Assets TTM\",\n", " \"Return on Equity TTM\",\n", " \"Return on Capital Employed TTM\",\n", " \"Debt ratio TTM\",\n", " \"Debt-Equity ratio TTM\",\n", " \"Interest Coverage TTM\",\n", " \"PE ratio TTM\",\n", " \"PEG ratio TTM\",\n", " \"Price to Sales ratio TTM\",\n", " \"Price to Fair-Value TTM\",\n", " \"Dividend per share TTM\",\n", " \"Revenue growth\",\n", " \"EBITDA growth\",\n", " \"Net-Income growth\",\n", " \"Growth in Net-Income-ratio\",\n", " \"EPS growth\",\n", " \"Total-Debt growth\",\n", " \"NetDebt growth\",\n", " \"Altman Z Score\",\n", " \"Piotroski Score\",\n", " \"Working Capital\",\n", " \"Total Assets\",\n", " \"Retained Earnings\",\n", " \"EBIT\",\n", " \"Total Liabilities\"\n", "]\n", "\n", "# List of entities (companies) to be included in questions\n", "entities = [\n", " \"Amazon\", \"Apple\", \"Microsoft\", \"Google\", \"Facebook\", \"Nvidia\",\n", " \"Meta\", \"Tesla\", \"Broadcom\", \"ASML\", \"Costco\", \"Cisco\", \"Intel\", \"Adobe\"\n", "]\n", "\n", "# New list of example questions\n", "example_questions = [\n", " \"What’s is the P/E of CVS compared to competitors\",\n", " \"Is abercrombie still a good buy\",\n", " \"Analyse CVS for me\",\n", " \"How does Arch Resources' return on equity compare to its competitors in the coal mining sector?\",\n", " \"What is the intrinsic value of Brookfield corporation\",\n", " \"What is the P/E ratio of Brookfield Corporation?\",\n", " \"Which companies, similar to Amazon, have a significant presence in both e-commerce and cloud computing services?\",\n", " \"Which tech-driven businesses, akin to Amazon, provide extensive e-commerce solutions and operate successful cloud computing divisions?\",\n", " \"Is Dynagas good stock for long run\",\n", " \"Is Taiwan semiconductor good stock in the long run\",\n", " \"Will Electric Vehicles outperform the market this year?\",\n", " \"Analyse latest earning call of Meta\",\n", " \"How does QSR.TO's market capitalization compare to its peers in the restaurant sector?\",\n", " \"How have Amazon's focus areas changed?\",\n", " \"What are the key risks investing in nvidia\",\n", " \"What is the ROI of Starbucks?\",\n", " \"What do you think of ticker LNG?\",\n", " \"Analyze CDW\",\n", " \"Is it a right time to buy Nvidia stock?\",\n", " \"Levi stock\",\n", " \"Tell me about TSLA but talk like a pirate\",\n", " \"What about BYD?\",\n", " \"What is Starbucks' return on equity (ROE) compared to its competitors in the food and beverage sector?\",\n", " \"What is McDonald's ROE compared to Starbucks in the food and beverage sector?\",\n", " \"How does Starbucks' ROE compare to Dunkin' Donuts in the coffee and bakery sector?\",\n", " \"Why the company DoubleVerify Holdings shares fell?\",\n", " \"What are the latest news for AMD?\",\n", " \"What is the latest close price of NVDA?\",\n", " \"Why is Tesla so overrated?\",\n", " \"How do Costco and Walmart compare in terms of their respective market shares in the retail industry?\",\n", " \"What are the consensus EPS forecasts for PYPL for the next 3 financial years?\",\n", " \"What is PYPL EPS in FY24, FY25 and FY26?\",\n", " \"Give me a chart of coke's P/E ratio for the last 5 years\",\n", " \"What is the stock price of Apple?\",\n", " \"Compare AMD and intel fundamentals for the last 5 years\",\n", " \"Compare the stocks of AMD and intel based on their fundamentals for the last 5 years\",\n", " \"What is the average return on equity (ROE) for AMD and Intel over the last 5 years?\",\n", " \"Do a sentiment comparison between intel and amd for the last 2 years\",\n", " \"How the options are looking for both amd and intel for the last 10 trading days\",\n", " \"How has the price-to-earnings ratio (P/E) for AMD and Intel compared over the last 10 trading days?\",\n", " \"Why intel price dropped during 2021 and 2022?\",\n", " \"How is ASTS?\",\n", " \"Is MTCH revenue growing?\",\n", " \"Tell me about amazon\",\n", " \"Will GME go up or go down today?\",\n", " \"What are the key factors influencing GME's stock price movement?\",\n", " \"Berkshire b vs voo\",\n", " \"Compare Berkshire b with voo for past 20 years performance and report me\",\n", " \"What are your thoughts on moneylion?\",\n", " \"Akon\",\n", " \"Predictions on NVIDIA for tomorrow\",\n", " \"Value of oracle\",\n", " \"Tell me about OKLA\",\n", " \"Is ASTS a good investment?\",\n", " \"Is smci good? why or why not\",\n", " \"Should I buy accenture?\",\n", " \"What is Accenture's return on equity (ROE) compared to its competitors in the IT services industry?\",\n", " \"Should I invest in Intel intc?\",\n", " \"What do you think about the ticker ENPH?\",\n", " \"What is status of apple stocks?\",\n", " \"Should I buy more apple?\",\n", " \"What is the average growth rate of HD revenue?\",\n", " \"What is the average growth rate of Home Depot revenue?\",\n", " \"I have been tracking the share price of MSI (Motorola Solutions). What are the predictions for the next 6 months?\",\n", " \"How's uber doing lately?\",\n", " \"Show me the revenue, income, price graphs for Uber\",\n", " \"Is ANGELONE a good stock to invest in?\",\n", " \"Nvidia metrics and comparables\",\n", " \"How does Nvidia's profitability compare to other semiconductor companies?\",\n", " \"How does Nvidia's market capitalization compare to other semiconductor companies?\",\n", " \"What do you think about hapag loyd?\",\n", " \"Amazon latest results\",\n", " \"Can you give me important informations for cloudlfare?\",\n", " \"What is MSFT last 3 year revenue?\",\n", " \"What is the revenue growth rate of MSFT in the last 3 years?\",\n", " \"What is asts?\",\n", " \"Future of polestar\",\n", " \"What are some key metrics of nvidia over the past few years?\",\n", " \"What is Nvidia's ROE and ROA over the past few years?\",\n", " \"Is ford a good investment right now?\",\n", " \"What are the prospects of growth for paypal?\",\n", " \"Would Costco be a good investment right now?\",\n", " \"Costco vs wallmart\",\n", " \"How do Costco and Walmart compare in terms of their respective market shares in the retail industry?\",\n", " \"How would you value costco?\",\n", " \"What is Costco's price-to-earnings ratio compared to other discount retailers?\",\n", " \"How is Ocugen performing?\",\n", " \"I meant OCGN\",\n", " \"What was Apple’s revenue last year?\",\n", " \"What is the outlook for ARM stock?\",\n", " \"What is the outlook for ARM stock?\",\n", " \"Outlook for giig stock in 2025\",\n", " \"Is SLS a good buy right now?\",\n", " \"Is AMD a good buy right now?\",\n", " \"What is happening with Tesla stock?\",\n", " \"What is the short interest on WIRE?\",\n", " \"What is the market cap of WIRE?\",\n", " \"Is AMD good to be invested now or should I vest my RSU now?\",\n", " \"What are the growth prospects for Intel in the semiconductor industry?\",\n", " \"NVDA\",\n", " \"Analyze the stock shw\",\n", " \"Zts\",\n", " \"Analyze zts\",\n", " \"Tell me about the outlook of The Qt Company, QTCOM\",\n", " \"What is the average revenue growth rate of The Qt Company in the past five years compared to its competitors in the software development industry?\",\n", " \"META vs MSFT vs NVDA?\",\n", " \"NVDA\",\n", " \"How does NVDA's market capitalization compare to its peers in the semiconductor industry?\",\n", " \"What is JPM’s net interest margin over the last 10 years?\",\n", " \"How was Q1 of Smart Sand compared to Q4 2023?\",\n", " \"Price target for NVDA?\",\n", " \"What is the price target for NVIDIA's competitors like AMD and Intel?\",\n", " \"Is lulu a good buy?\",\n", " \"Is nvidia worth buying?\",\n", " \"What is the market cap of Nvidia compared to its competitors?\",\n", " \"What stock trend is predicted for nvda?\",\n", " \"Should I buy calls on NVDA for December 2024?\",\n", " \"What do you think about SQQQ?\",\n", " \"What are the main competitors to crocs inc.?\",\n", " \"What is the long term debt of walgreen boots alliance (WBA)?\",\n", " \"What is the long term debt of CVS Health (CVS)?\",\n", " \"What is future of DAX40?\",\n", " \"What's the intrinsic value of nvidia?\",\n", " \"What is the market capitalization of Nvidia's competitors?\",\n", " \"What is the intrinsic value of Amazon?\",\n", " \"What is the intrinsic value of Walmart?\",\n", " \"What is Linde dcf value?\",\n", " \"What can you tell me about linde?\",\n", " \"What can you tell me about evvty (evolution ab)?\",\n", " \"Give me the evvty important numbers\",\n", " \"EVVTY Analysis\",\n", " \"How does EVVTY's ROE compare to its peers in the industry?\",\n", " \"What is quarterly sales and earnings of fiserv for last 2 years?\",\n", " \"What is the average quarterly sales and earnings growth rate of Fiserv and its competitors in the financial technology industry over the last 2 years?\",\n", " \"Calculate the price action movement for the last 30 days for google\",\n", " \"What are the top competitors of Albemarle in the chemical industry?\",\n", " \"What is nvidia?\",\n", " \"What is Nvidia's primary business?\",\n", " \"Current risk in Ulta stock?\",\n", " \"What is the current risk in Ulta's competitors' stocks?\",\n", " \"What is the forecast of Nano one materials?\"\n", "]\n", "\n", "\n", "df_list=[]\n", "for metric in metrics:\n", " for entity in entities:\n", " for example_question in example_questions:\n", " for _ in range(3): # Repeat 10 times\n", " prompt = f\"\"\" You are an expert stock market analyst tasked with creating questions for training a model. Here are some example questions for reference:\\n\\n{example_questions}\\n\\nNow, generate 1 unique, diverse, and insightful question specifically for the metric '{metric}' and the entity '{entity}'. Ensure the question provides significant analytical depth and covers the key aspects outlined. The question should be less than 20 words and must contain both the metric '{metric}' and the entity '{entity}' exactly as specified. Use the metric '{metric}' and entity '{entity}' exactly as provided in your response, this is very very important; specifically check for this condition every single time before generating a question - like if metric is Operating Cash Flow - it should be used as it is. Start generating the questions directly without any preamble.Provide question in xml tags i.e ....\"\"\"\n", " df_list.append({'Example Question': example_question,'Metric' :metric,\"Entity\": entity,'Prompt': prompt})\n", "\n", "\n", "# Create a DataFrame from the list\n", "full_df = pd.DataFrame(df_list).sample(n=25000)\n", "full_df\n", "\n", "prompts=[]\n", "all_questions=full_df['Prompt'].values.tolist()\n", "\n", "for question in all_questions:\n", " prompts.append(question)\n", "\n", "sampling_params = SamplingParams(\n", " temperature=0.7,\n", " max_tokens=100\n", " )\n", "\n", "outputs = vllm_model.generate(prompts, sampling_params)\n", "\n", "llm_output=[]\n", "for output in outputs:\n", " try:\n", " generated_text = output.outputs[0].text\n", " match = re.search(r'(.*?)', generated_text, re.DOTALL)\n", " question_text = match.group(1).strip() # Strips any leading/trailing whitespace\n", "\n", "\n", " llm_output.append(question_text)\n", " except:\n", " llm_output.append('')\n", "\n", "\n", "full_df['Response'] =llm_output\n", "display(full_df)\n", "full_df=full_df[full_df['Response']!='']\n", "\n", "# Define the directory path and file name\n", "directory = '/home/ady/Stockbuzz.ai_Data/RouterNER/'\n", "file_path = os.path.join(directory, 'Ady_User_Database_NER_Entities_Metrics_2.csv')\n", "\n", "# Check if the directory exists, and create it if not\n", "if not os.path.exists(directory):\n", " os.makedirs(directory)\n", "\n", "# Now save the DataFrame to the CSV file\n", "full_df.to_csv(file_path, index=False)\n", "\n", "print(f\"Synthetic questions dataset created and saved to {file_path}.\")\n", "# full_df.to_csv('/home/ady/Stocks_Guidance/StockBuzz_v2_June_24/StockBuzz_v2_June_24/AgentsTest/NER/Ady_User_Database_NER_Entities_Metrics_2.csv', index=False)# print(\"Synthetic questions dataset created and saved to 'Ady_Database4.csv'.\")\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NER Model" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Size of train dataset: 19891\n", "Size of validation dataset: 4975\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Training Label Counts:\n", "Counter({'O': 322182, 'I-METRIC': 31143, 'B-ENTITY': 19886, 'B-METRIC': 16255})\n", "\n", "Validation Label Counts:\n", "Counter({'O': 80848, 'I-METRIC': 7635, 'B-ENTITY': 4970, 'B-METRIC': 4037})\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Map: 100%|██████████| 19891/19891 [00:01<00:00, 13101.13 examples/s]\n", "Map: 100%|██████████| 4975/4975 [00:00<00:00, 7058.05 examples/s] \n", "/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n", " warnings.warn(\n", "torch.distributed process group is initialized, but parallel_mode != ParallelMode.DISTRIBUTED. In order to use Torch DDP, launch your script with `python -m torch.distributed.launch\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [ 350/1550 09:57 < 34:19, 0.58 it/s, Epoch 2/10]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining LossValidation LossOverall PrecisionOverall RecallOverall F1Overall AccuracyEntity PrecisionEntity RecallEntity F1Entity AccuracyMetric PrecisionMetric RecallMetric F1Metric Accuracy
101.7677001.8369320.2945790.1240760.1746070.7730010.3480450.2017760.2554540.0000000.0074850.0009310.0016570.005126
201.6596001.6805010.2933200.1242190.1745270.7730390.3483080.2020100.2557120.0000000.0072360.0009310.0016500.006003
301.5145001.4298450.2841660.1242190.1728700.7734520.3481680.2020100.2556750.0000000.0060100.0009310.0016130.013421
401.2871001.1524680.2197710.1265890.1606450.7992280.3478790.2021260.2556900.0002340.0152260.0068930.0094900.217846
501.0478000.8885660.2511840.1523660.1896760.8100150.3571860.2093700.2639950.0144880.1231170.0624070.0828290.299521
600.7812000.6001870.7098950.6037190.6525160.8847830.9592850.9221870.9403710.8387660.1791770.0964980.1254390.358333
700.5419000.3645090.7648580.6801180.7200030.9067690.9933250.9910040.9921630.9887840.3196770.1846130.2340580.431712
800.3522000.2449810.8277780.8327710.8302670.9430340.9952040.9940410.9946220.9936910.6895790.5830850.6318760.707830
900.2465000.1534160.8540470.8772890.8655120.9628990.9987140.9981310.9984220.9980140.7519210.6928090.7211560.829770
1000.1680000.1037610.9012170.9360950.9183250.9761520.9991820.9990650.9991240.9989480.8918550.8464980.8685850.920213
1100.1155000.0644400.9197060.9614420.9401110.9851920.9998830.9998830.9998830.9997660.9355210.9135620.9244110.960478
1200.0736000.0360660.9520670.9755150.9636490.9916131.0000001.0000001.0000000.9998830.9558690.9482120.9520250.976799
1300.0514000.0263810.9654010.9857110.9754500.9937431.0000001.0000001.0000000.9998830.9805130.9748510.9776740.987860
1400.0402000.0223010.9720170.9926760.9822380.9951151.0000001.0000001.0000000.9998830.9928960.9893820.9911360.994537
1500.0313000.0166310.9771120.9931790.9850800.9960851.0000001.0000001.0000000.9998830.9923310.9882640.9902930.993997
1600.0259000.0151020.9798620.9956920.9877130.9966221.0000001.0000001.0000000.9998830.9973850.9947840.9960830.997167
1700.0215000.0101090.9903610.9959790.9931620.9981671.0000000.9998830.9999420.9997660.9962670.9942250.9952450.996965
1800.0154000.0068900.9950550.9969120.9959830.9989351.0000000.9998830.9999420.9997660.9964530.9944110.9954310.996965
1900.0130000.0070910.9945570.9972000.9958770.9989351.0000000.9998830.9999420.9997660.9994400.9973920.9984150.998516
2000.0096000.0055770.9965590.9981330.9973450.9992511.0000000.9998830.9999420.9997660.9990670.9972060.9981350.998584
2100.0094000.0046250.9972030.9984200.9978110.9993571.0000000.9998830.9999420.9997660.9981340.9966470.9973900.998314
2200.0095000.0050230.9970610.9987790.9979200.9993191.0000000.9998830.9999420.9997660.9992540.9977650.9985090.998921
2300.0065000.0037570.9977040.9984200.9980620.9994631.0000000.9998830.9999420.9997660.9981340.9966470.9973900.998381
2400.0051000.0039360.9977770.9991380.9984570.9994531.0000001.0000001.0000000.9998830.9998130.9985100.9991610.999123
2500.0056000.0031050.9984930.9987790.9986360.9995681.0000000.9998830.9999420.9997660.9994400.9975780.9985080.998651
2600.0057000.0031940.9981350.9991380.9986360.9995111.0000000.9998830.9999420.9997660.9998130.9985100.9991610.999123
2700.0045000.0028460.9984930.9992100.9988520.9996161.0000000.9998830.9999420.9997660.9998140.9986960.9992540.999258
2800.0050000.0026170.9985650.9992100.9988870.9996551.0000001.0000001.0000000.9998830.9994410.9985100.9989750.999258
2900.0034000.0025720.9982780.9991380.9987080.9996261.0000001.0000001.0000000.9998830.9992540.9985100.9988820.999326
3000.0051000.0023820.9982780.9992100.9987440.9996641.0000001.0000001.0000000.9998830.9990680.9986960.9988820.999393
3100.0056000.0025590.9981350.9993540.9987440.9996551.0000001.0000001.0000000.9998830.9990690.9990690.9990690.999528
3200.0050000.0020000.9989230.9991380.9990310.9996741.0000000.9998830.9999420.9997660.9994410.9983230.9988820.998988
3300.0036000.0019960.9990670.9991380.9991020.9996641.0000000.9998830.9999420.9997660.9996270.9981370.9988810.998853
3400.0043000.0022590.9983500.9990670.9987080.9996351.0000001.0000001.0000000.9998830.9990680.9981370.9986020.999056
3500.0029000.0020710.9989230.9992100.9990670.9996741.0000001.0000001.0000000.9998830.9996270.9983230.9989750.998988

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n", "\n", "import re\n", "import numpy as np\n", "import torch\n", "import pandas as pd\n", "import time\n", "from datasets import Dataset, load_metric\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer, DataCollatorForTokenClassification, TrainerCallback, EarlyStoppingCallback\n", "from transformers.trainer_utils import IntervalStrategy\n", "import matplotlib.pyplot as plt\n", "from collections import Counter\n", "import torch.nn as nn\n", "\n", "pd.set_option('display.width', 1000)\n", "\n", "# Define the label mapping\n", "labels = [\"O\", \"B-ENTITY\", \"I-ENTITY\", \"B-METRIC\", \"I-METRIC\"]\n", "label2id = {label: i for i, label in enumerate(labels)}\n", "id2label = {i: label for i, label in enumerate(labels)}\n", "\n", "# Load the dataset\n", "df_input_consolidated = pd.read_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Ady_User_Database_NER_Entities_Metrics_2.csv')\n", "\n", "df_input_consolidated = df_input_consolidated[df_input_consolidated['Response'] != 'Error: ']\n", "df_input_consolidated['Response'] = df_input_consolidated['Response'].str.lower()\n", "df_input_consolidated['Metric'] = df_input_consolidated['Metric'].str.lower()\n", "df_input_consolidated['Entity'] = df_input_consolidated['Entity'].str.lower()\n", "\n", "train_set = int(0.8 * len(df_input_consolidated))\n", "df_train = df_input_consolidated[:train_set][['Response', 'Metric', 'Entity']]\n", "df_val = df_input_consolidated[train_set:][['Response', 'Metric', 'Entity']]\n", "\n", "# Function to tokenize sentences and assign NER tags\n", "def tag_entities_and_metrics(sentence, metric, entity):\n", " words = re.findall(r\"\\w+|[.,!?;'-]|\\b's\\b\", sentence)\n", " tags = ['O'] * len(words)\n", "\n", " # Tagging the metric\n", " metric_tokens = metric.split()\n", " metric_len = len(metric_tokens)\n", " for i in range(len(words) - metric_len + 1):\n", " if words[i:i + metric_len] == metric_tokens:\n", " tags[i] = 'B-METRIC'\n", " for j in range(1, metric_len):\n", " tags[i + j] = 'I-METRIC'\n", "\n", " # Tagging the entity\n", " entity_tokens = entity.split()\n", " entity_len = len(entity_tokens)\n", " for i in range(len(words) - entity_len + 1):\n", " if words[i:i + entity_len] == entity_tokens:\n", " tags[i] = 'B-ENTITY'\n", " for j in range(1, entity_len):\n", " tags[i + j] = 'I-ENTITY'\n", "\n", " return words, tags\n", "\n", "# Function to process dataframe and prepare datasets\n", "def prepare_dataset(df):\n", " df['tokens_and_tags'] = df.apply(lambda row: tag_entities_and_metrics(row['Response'], row['Metric'], row['Entity']), axis=1)\n", " df['tokens'] = df['tokens_and_tags'].apply(lambda x: x[0])\n", " df['ner_tags'] = df['tokens_and_tags'].apply(lambda x: x[1])\n", " df = df.drop(columns=['tokens_and_tags'])\n", " df = df[df['ner_tags'].apply(lambda tags: not all(tag == 'O' for tag in tags))]\n", " df['ner_tags'] = df['ner_tags'].apply(lambda tags: [label2id[tag] for tag in tags])\n", " return Dataset.from_pandas(df)\n", "\n", "# Prepare datasets\n", "train_dataset = prepare_dataset(df_train)\n", "val_dataset = prepare_dataset(df_val)\n", "\n", "# Print the size of the train and validation datasets\n", "print(f\"Size of train dataset: {len(train_dataset)}\")\n", "print(f\"Size of validation dataset: {len(val_dataset)}\")\n", "\n", "# Count the occurrences of each label in the training and validation sets\n", "def count_labels(dataset):\n", " label_counts = Counter()\n", " for example in dataset:\n", " labels = example['ner_tags']\n", " for label in labels:\n", " label_counts[id2label[label]] += 1\n", " return label_counts\n", "\n", "train_label_counts = count_labels(train_dataset)\n", "val_label_counts = count_labels(val_dataset)\n", "\n", "print(\"Training Label Counts:\")\n", "print(train_label_counts)\n", "\n", "print(\"\\nValidation Label Counts:\")\n", "print(val_label_counts)\n", "\n", "# Tokenization and Alignment Functions\n", "checkpoint = 'Jean-Baptiste/roberta-large-ner-english'\n", "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n", "\n", "def align_labels(labels, word_ids):\n", " aligned_labels = []\n", " prev_word_id = None\n", " for word_id in word_ids:\n", " if word_id is None:\n", " aligned_labels.append(-100)\n", " elif word_id != prev_word_id:\n", " aligned_labels.append(labels[word_id])\n", " else:\n", " aligned_labels.append(labels[word_id] if labels[word_id] != 0 else -100)\n", " prev_word_id = word_id\n", " return aligned_labels\n", "\n", "def tokenize_and_align_labels(examples):\n", " tokenized_inputs = tokenizer(examples['tokens'], truncation=True, max_length=32, is_split_into_words=True)\n", " all_labels = examples['ner_tags']\n", " new_labels = [align_labels(labels, tokenized_inputs.word_ids(i)) for i, labels in enumerate(all_labels)]\n", " tokenized_inputs['labels'] = new_labels\n", " return tokenized_inputs\n", "\n", "tokenized_train_dataset = train_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=['Response', 'Metric', 'Entity'])\n", "tokenized_val_dataset = val_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=['Response', 'Metric', 'Entity'])\n", "\n", "# Define the data collator\n", "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)\n", "\n", "# Load metric\n", "metric = load_metric('seqeval',trust_remote_code=True)\n", "\n", "# Custom loss function to give higher weight to metrics\n", "class WeightedLoss(nn.CrossEntropyLoss):\n", " def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'):\n", " super(WeightedLoss, self).__init__(weight, size_average, ignore_index, reduce, reduction)\n", "\n", " def forward(self, input, target):\n", " if self.weight is not None:\n", " assert self.weight.dim() == 1\n", " input = input * self.weight.unsqueeze(0).expand_as(input)\n", " return super(WeightedLoss, self).forward(input, target)\n", "\n", "# Custom Trainer to use the weighted loss\n", "class CustomTrainer(Trainer):\n", " def compute_loss(self, model, inputs, return_outputs=False):\n", " labels = inputs.get(\"labels\")\n", " outputs = model(**inputs)\n", " logits = outputs.get(\"logits\")\n", " # Define the weights, giving higher weight to METRIC labels\n", " class_weights = torch.tensor([0.4, 0.5, 0.5, 1.0, 1.0], device=logits.device)\n", " loss_fct = WeightedLoss(weight=class_weights)\n", " loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n", " return (loss, outputs) if return_outputs else loss\n", "\n", "# Function to compute metrics\n", "def compute_metrics(p):\n", " predictions, labels = p\n", " predictions = np.argmax(predictions, axis=2)\n", " true_labels = [[id2label[label] for label in label_set if label != -100] for label_set in labels]\n", " true_predictions = [[id2label[pred] for pred, label in zip(pred_set, label_set) if label != -100] for pred_set, label_set in zip(predictions, labels)]\n", " results = metric.compute(predictions=true_predictions, references=true_labels, zero_division=0)\n", "\n", " # Calculate metrics for ENTITY\n", " entity_results = metric.compute(\n", " predictions=[[pred for pred, true in zip(pred_set, label_set) if true.startswith('B-ENTITY') or true.startswith('I-ENTITY')] for pred_set, label_set in zip(true_predictions, true_labels)],\n", " references=[[true for true in label_set if true.startswith('B-ENTITY') or true.startswith('I-ENTITY')] for label_set in true_labels],\n", " zero_division=0\n", " )\n", "\n", " # Calculate metrics for METRIC\n", " metric_results = metric.compute(\n", " predictions=[[pred for pred, true in zip(pred_set, label_set) if true.startswith('B-METRIC') or true.startswith('I-METRIC')] for pred_set, label_set in zip(true_predictions, true_labels)],\n", " references=[[true for true in label_set if true.startswith('B-METRIC') or true.startswith('I-METRIC')] for label_set in true_labels],\n", " zero_division=0\n", " )\n", "\n", " return {\n", " \"overall_precision\": results[\"overall_precision\"],\n", " \"overall_recall\": results[\"overall_recall\"],\n", " \"overall_f1\": results[\"overall_f1\"],\n", " \"overall_accuracy\": results[\"overall_accuracy\"],\n", " \"entity_precision\": entity_results[\"overall_precision\"],\n", " \"entity_recall\": entity_results[\"overall_recall\"],\n", " \"entity_f1\": entity_results[\"overall_f1\"],\n", " \"entity_accuracy\": entity_results[\"overall_accuracy\"],\n", " \"metric_precision\": metric_results[\"overall_precision\"],\n", " \"metric_recall\": metric_results[\"overall_recall\"],\n", " \"metric_f1\": metric_results[\"overall_f1\"],\n", " \"metric_accuracy\": metric_results[\"overall_accuracy\"],\n", " }\n", "\n", "# Load the model\n", "model = AutoModelForTokenClassification.from_pretrained(checkpoint, num_labels=len(labels), id2label=id2label, label2id=label2id,ignore_mismatched_sizes=True)\n", "\n", "# Custom callback to capture loss history\n", "class LossHistoryCallback(TrainerCallback):\n", " def __init__(self):\n", " self.losses = []\n", " self.eval_losses = []\n", "\n", " def on_log(self, args, state, control, logs=None, **kwargs):\n", " if logs is not None:\n", " if 'loss' in logs:\n", " self.losses.append(logs['loss'])\n", " if 'eval_loss' in logs:\n", " self.eval_losses.append(logs['eval_loss'])\n", "\n", "loss_history_callback = LossHistoryCallback()\n", "\n", "# Training arguments\n", "training_args = TrainingArguments(\n", " output_dir='./results',\n", " evaluation_strategy=IntervalStrategy.STEPS,\n", " save_strategy=\"steps\",\n", " logging_strategy=\"steps\",\n", " learning_rate=1e-5,\n", " num_train_epochs=10,\n", " per_device_train_batch_size=32,\n", " per_device_eval_batch_size=32, \n", " gradient_accumulation_steps=4, # Accumulate gradients over 4 steps\n", " warmup_steps=500,\n", " weight_decay=0.01,\n", " logging_dir='./logs',\n", " logging_steps=10,\n", " eval_steps=10,\n", " save_steps=10,\n", " load_best_model_at_end=True,\n", " save_total_limit=1,\n", ")\n", "\n", "# Custom Trainer instance\n", "trainer = CustomTrainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=tokenized_train_dataset,\n", " eval_dataset=tokenized_val_dataset,\n", " tokenizer=tokenizer,\n", " data_collator=data_collator,\n", " compute_metrics=compute_metrics,\n", " callbacks=[EarlyStoppingCallback(early_stopping_patience=2), loss_history_callback]\n", ")\n", "\n", "# Train the model\n", "trainer.train()\n", "\n", "# Save the model\n", "trainer.save_model('/home/ady/Stockbuzz.ai_Models/RouterNER/ner_model')\n", "\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NER Inference" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Entities and metrics and their average scores for text: 'What is the EPS for Apple from 2023, compare it against Intel?'\n", "Predicted Entities: {'apple': [0.9999647]}\n", "Predicted Metrics: {'eps': [0.99446386]}\n", "\n", "==================================================\n", "\n", "Entities and metrics and their average scores for text: 'What are the REVENUES of Amazon?'\n", "Predicted Entities: {'amazon': [0.9999809]}\n", "Predicted Metrics: {}\n", "\n", "==================================================\n", "\n", "Entities and metrics and their average scores for text: 'What is the annual income for Microsoft vs NVDA?'\n", "Predicted Entities: {'microsoft': [0.9999985]}\n", "Predicted Metrics: {}\n", "\n", "==================================================\n", "\n", "Entities and metrics and their average scores for text: 'What is the depreciation for ShaktiPumps in 2023?'\n", "Predicted Entities: {}\n", "Predicted Metrics: {}\n", "\n", "==================================================\n", "\n", "Entities and metrics and their average scores for text: 'What is the ttm net profits for Amazon in 2023?'\n", "Predicted Entities: {'amazon': [0.9999693]}\n", "Predicted Metrics: {'tm': [0.5082908]}\n", "\n", "==================================================\n", "\n" ] } ], "source": [ "import time\n", "import numpy as np\n", "from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n", "\n", "checkpoint = \"ner_model\"\n", "tokenizer_ner = AutoTokenizer.from_pretrained(checkpoint)\n", "model_ner = AutoModelForTokenClassification.from_pretrained(checkpoint)\n", "\n", "# Initialize the NER pipeline without additional parameters\n", "ner_model = pipeline('ner', model=model_ner, tokenizer=tokenizer_ner, aggregation_strategy='simple')\n", "\n", "# Enhanced post-processing\n", "def extract_entities_with_scores(example_text):\n", " # Tokenize the text with offset mappings\n", " encoding = tokenizer_ner(example_text, return_offsets_mapping=True, truncation=True, max_length=32)\n", " offset_mapping = encoding['offset_mapping']\n", " tokens = tokenizer_ner.convert_ids_to_tokens(encoding['input_ids'])\n", " \n", " # Use the pipeline directly\n", " ner_results = ner_model(example_text)\n", " # print(ner_results)\n", "\n", " entities = {}\n", " metrics = {}\n", " current_entity = None\n", "\n", " for result in ner_results:\n", " entity_type = result['entity_group']\n", " start, end = result['start'], result['end']\n", " score = result['score']\n", " word = example_text[start:end].strip()\n", " # print(word)\n", "\n", " # Check if current entity needs to be continued or started fresh\n", " if current_entity and current_entity['entity_group'] == entity_type and (current_entity['end'] == start or current_entity['end'] + 1 == start):\n", " # Continue the current entity\n", " current_entity['word'] += ' ' + word if current_entity['end'] + 1 == start else word\n", " current_entity['scores'].append(score)\n", " current_entity['end'] = end\n", " else:\n", " # Save the previous entity\n", " if current_entity:\n", " entity_name = current_entity['word']\n", " average_score = np.mean(current_entity['scores'])\n", " if current_entity['entity_group'] == 'ENTITY':\n", " if entity_name not in entities:\n", " entities[entity_name] = []\n", " entities[entity_name].append(average_score)\n", " elif current_entity['entity_group'] == 'METRIC':\n", " if entity_name not in metrics:\n", " metrics[entity_name] = []\n", " metrics[entity_name].append(average_score)\n", "\n", " # Start a new entity\n", " current_entity = {'entity_group': entity_type, 'word': word, 'scores': [score], 'start': start, 'end': end}\n", "\n", " # Append the last entity or metric if exists\n", " if current_entity:\n", " entity_name = current_entity['word']\n", " average_score = np.mean(current_entity['scores'])\n", " if current_entity['entity_group'] == 'ENTITY':\n", " if entity_name not in entities:\n", " entities[entity_name] = []\n", " entities[entity_name].append(average_score)\n", " elif current_entity['entity_group'] == 'METRIC':\n", " if entity_name not in metrics:\n", " metrics[entity_name] = []\n", " metrics[entity_name].append(average_score)\n", "\n", " return entities, metrics\n", "\n", "# Test the model with example texts\n", "example_texts = [\n", " \"What is the EPS for Apple from 2023, compare it against Intel?\",\n", " \"What are the REVENUES of Amazon?\",\n", " \"What is the annual income for Microsoft vs NVDA?\",\n", " \"What is the depreciation for ShaktiPumps in 2023?\",\n", " \"What is the ttm net profits for Amazon in 2023?\"\n", "]\n", "\n", "## Test set \n", "# example_texts = df_val.sample(n=5)['Response'].values.tolist()\n", "\n", "\n", "for text in example_texts:\n", " entities, metrics = extract_entities_with_scores(text.lower())\n", "\n", " print(f\"Entities and metrics and their average scores for text: '{text}'\")\n", " print(f\"Predicted Entities: {entities}\")\n", " print(f\"Predicted Metrics: {metrics}\")\n", " print(\"\\n\" + \"=\"*50 + \"\\n\")\n" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Metric\n", "pe ratio ttm 1156\n", "return on assets ttm 635\n", "dividend yield 615\n", "piotroski score 612\n", "return on capital employed 611\n", "price to fair-value ttm 610\n", "net-profit margin ttm 605\n", "debt-equity ratio ttm 603\n", "debt ratio ttm 598\n", "operating profit margin ttm 597\n", "total-debt growth 595\n", "retained earnings 594\n", "altman z score 591\n", "pe ratio 589\n", "ebit 589\n", "dividend yield ttm 589\n", "return on capital employed ttm 588\n", "netdebt growth 584\n", "growth in net-income-ratio 582\n", "dividend yield percentage ttm 581\n", "gross profit margin ttm 578\n", "interest coverage ttm 578\n", "peg ratio ttm 577\n", "pe to growth ratio 576\n", "dividend payout ratio 575\n", "ebitda growth 574\n", "pef ratio ttm 572\n", "working capital 571\n", "debt-equity ratio 571\n", "revenue growth 571\n", "eps growth 571\n", "enterprise value multiple 566\n", "current ratio ttm 562\n", "total liabilities 562\n", "dividend per share ttm 560\n", "interest coverage 558\n", "net-income growth 557\n", "price to sales ratio ttm 555\n", "quick ratio ttm 551\n", "return on equity ttm 550\n", "debt ratio 546\n", "total assets 539\n", "Name: count, dtype: int64" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_input_consolidated['Metric'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Send to HF" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Router" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Send to Huggingface\n", "\n", "from huggingface_hub import login\n", "login(token=\"\")\n", "\n", "from huggingface_hub import upload_folder\n", "\n", "# Define the local directory and repo details\n", "local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/\"\n", "repo_id = \"Aditiyadav/Router\" # Your Hugging Face repo\n", "\n", "# Upload the folder directly to Hugging Face Hub\n", "upload_folder(\n", " repo_id=repo_id,\n", " folder_path=local_model_path, # The local directory containing the model\n", " commit_message=\"Uploading model from local directory\"\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Send to Huggingface\n", "\n", "from huggingface_hub import login\n", "login(token=\"\")\n", "\n", "from huggingface_hub import upload_folder\n", "\n", "# Define the local directory and repo details\n", "local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/\"\n", "repo_id = \"Aditiyadav/SubRouter\" # Your Hugging Face repo\n", "\n", "# Upload the folder directly to Hugging Face Hub\n", "upload_folder(\n", " repo_id=repo_id,\n", " folder_path=local_model_path, # The local directory containing the model\n", " commit_message=\"Uploading model from local directory\"\n", ")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Send to Huggingface\n", "\n", "from huggingface_hub import login\n", "login(token=\"\")\n", "\n", "from huggingface_hub import upload_folder\n", "\n", "# Define the local directory and repo details\n", "local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/ner_model/\"\n", "repo_id = \"Aditiyadav/NER\" # Your Hugging Face repo\n", "\n", "# Upload the folder directly to Hugging Face Hub\n", "upload_folder(\n", " repo_id=repo_id,\n", " folder_path=local_model_path, # The local directory containing the model\n", " commit_message=\"Uploading model from local directory\"\n", ")\n" ] } ], "metadata": { "kernelspec": { "display_name": "gpu_jan12", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }