"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [310/310 05:22, Epoch 5/5]\n",
"
\n",
" \n",
" \n",
" \n",
" | Epoch | \n",
" Training Loss | \n",
" Validation Loss | \n",
" Accuracy | \n",
" Precision | \n",
" Recall | \n",
" F1 | \n",
"
\n",
" \n",
" \n",
" \n",
" | 1 | \n",
" 0.948000 | \n",
" 0.775363 | \n",
" 0.886082 | \n",
" 0.893503 | \n",
" 0.886082 | \n",
" 0.887385 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0.235800 | \n",
" 0.169679 | \n",
" 0.959794 | \n",
" 0.960594 | \n",
" 0.959794 | \n",
" 0.959883 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0.152600 | \n",
" 0.123418 | \n",
" 0.964948 | \n",
" 0.965072 | \n",
" 0.964948 | \n",
" 0.964948 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0.086800 | \n",
" 0.145899 | \n",
" 0.963918 | \n",
" 0.964666 | \n",
" 0.963918 | \n",
" 0.963899 | \n",
"
\n",
" \n",
" | 5 | \n",
" 0.076000 | \n",
" 0.161085 | \n",
" 0.962371 | \n",
" 0.963215 | \n",
" 0.962371 | \n",
" 0.962410 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"('SubRouter_Models/tokenizer_config.json',\n",
" 'SubRouter_Models/special_tokens_map.json',\n",
" 'SubRouter_Models/vocab.txt',\n",
" 'SubRouter_Models/added_tokens.json')"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import accuracy_score, precision_recall_fscore_support\n",
"from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EarlyStoppingCallback\n",
"import gc\n",
"import os\n",
"import json\n",
"\n",
"import os\n",
"\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n",
"\n",
"\n",
"\n",
"# Set device to GPU 0 if available\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"\n",
"# Clear GPU memory\n",
"torch.cuda.empty_cache()\n",
"gc.collect()\n",
"\n",
"# Load the generated CSV\n",
"df = pd.read_csv(\"/home/ady/Stockbuzz.ai_Data/RouterNER/Combined_Ady_Database.csv\")\n",
"df = df.sample(n=min(10000,len(df)))\n",
"\n",
"df.rename(columns={'Category': 'category', 'Response': 'Questions'}, inplace=True)\n",
"\n",
"# Create the category-to-label mapping\n",
"df['label'] = df['category'].astype('category').cat.codes\n",
"label_to_category = dict(enumerate(df['category'].astype('category').cat.categories))\n",
"category_to_label = {v: k for k, v in label_to_category.items()}\n",
"\n",
"# Save the mapping to a JSON file in the model's directory\n",
"os.makedirs('SubRouter_Models', exist_ok=True)\n",
"with open('SubRouter_Models/label_to_category.json', 'w') as f:\n",
" json.dump(label_to_category, f)\n",
"with open('SubRouter_Models/category_to_label.json', 'w') as f:\n",
" json.dump(category_to_label, f)\n",
"\n",
"# Split the dataset into training and validation sets based on unique 'Questions'\n",
"unique_questions = df['Questions'].unique().tolist()\n",
"train_size = int(0.8 * len(unique_questions))\n",
"val_size = int(0.2 * len(unique_questions))\n",
"\n",
"train_questions = unique_questions[:train_size]\n",
"val_questions = unique_questions[train_size:train_size + val_size]\n",
"\n",
"train_df = df[df['Questions'].isin(train_questions)]\n",
"val_df = df[df['Questions'].isin(val_questions)]\n",
"\n",
"# Initialize the BERT tokenizer and model\n",
"checkpoint=\"google-bert/bert-large-uncased\"\n",
"tokenizer_subrouter = BertTokenizer.from_pretrained(checkpoint)\n",
"model_subrouter = BertForSequenceClassification.from_pretrained(checkpoint, num_labels=len(df['category'].unique()))\n",
"model_subrouter.to(device) # Move the model to the specified device\n",
"\n",
"# Tokenize the datasets\n",
"def tokenize_data(data):\n",
" return tokenizer_subrouter(data['Questions'].astype(str).tolist(), padding=True, truncation=True, max_length=64, return_tensors='pt')\n",
"\n",
"train_tokens = tokenize_data(train_df)\n",
"val_tokens = tokenize_data(val_df)\n",
"\n",
"train_labels = torch.tensor(train_df['label'].values, dtype=torch.long)\n",
"val_labels = torch.tensor(val_df['label'].values, dtype=torch.long)\n",
"\n",
"# Create custom Dataset class\n",
"class ArticleDataset(torch.utils.data.Dataset):\n",
" def __init__(self, encodings, labels):\n",
" self.encodings = encodings\n",
" self.labels = labels\n",
"\n",
" def __getitem__(self, idx):\n",
" item = {key: val[idx] for key, val in self.encodings.items()}\n",
" item['labels'] = self.labels[idx]\n",
" return item\n",
"\n",
" def __len__(self):\n",
" return len(self.labels)\n",
"\n",
"train_dataset = ArticleDataset(train_tokens, train_labels)\n",
"val_dataset = ArticleDataset(val_tokens, val_labels)\n",
"\n",
"# Define the compute_metrics function for evaluation\n",
"def compute_metrics(pred):\n",
" labels = pred.label_ids\n",
" preds = pred.predictions.argmax(-1)\n",
" precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n",
" acc = accuracy_score(labels, preds)\n",
" return {\n",
" 'accuracy': acc,\n",
" 'precision': precision,\n",
" 'recall': recall,\n",
" 'f1': f1,\n",
" }\n",
"\n",
"# Define the training arguments\n",
"training_args = TrainingArguments(\n",
" output_dir='./results',\n",
" num_train_epochs=5,\n",
" per_device_train_batch_size=128,\n",
" per_device_eval_batch_size=128,\n",
" warmup_steps=500,\n",
" weight_decay=0.01,\n",
" logging_dir='./logs',\n",
" logging_steps=10,\n",
" evaluation_strategy=\"epoch\",\n",
" save_strategy=\"epoch\",\n",
" load_best_model_at_end=True,\n",
" save_total_limit=1,\n",
" no_cuda=False,\n",
" dataloader_pin_memory=False,\n",
")\n",
"\n",
"# Define the trainer\n",
"trainer = Trainer(\n",
" model=model_subrouter,\n",
" args=training_args,\n",
" train_dataset=train_dataset,\n",
" eval_dataset=val_dataset,\n",
" compute_metrics=compute_metrics,\n",
" callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],\n",
")\n",
"\n",
"# Train the model\n",
"trainer.train()\n",
"\n",
"# Save the model and tokenizer to the specified directory\n",
"model_subrouter.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/')\n",
"tokenizer_subrouter.save_pretrained('/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/')\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"SubRouter Inference"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Question: Compare depreciation ttm of amazon and intol\n",
"Predicted Category: multiple companies analysis\n",
"\n",
"Question: What is the current market trend for technology stocks?\n",
"Predicted Category: industry analysis\n",
"\n",
"Question: How did shakti pumps perform in the last quarter?\n",
"Predicted Category: single company analysis\n",
"\n",
"Question: Compare Amazon vs Apple in terms of revenues\n",
"Predicted Category: multiple companies analysis\n",
"\n",
"Question: What is the reasonable price of Nvidia?\n",
"Predicted Category: fair valuation\n",
"\n",
"Question: Compare Apple vs Micrsoft\n",
"Predicted Category: multiple companies analysis\n",
"\n"
]
}
],
"source": [
"import torch\n",
"from transformers import BertTokenizer, BertForSequenceClassification\n",
"import json\n",
"\n",
"# Load the model and tokenizer\n",
"device = torch.device(\"cuda\")\n",
"model_subrouter = BertForSequenceClassification.from_pretrained('./SubRouter_Models')\n",
"tokenizer_subrouter = BertTokenizer.from_pretrained('./SubRouter_Models')\n",
"model_subrouter.to(device) # Move the model to the specified device\n",
"\n",
"# Load the category-to-label mapping\n",
"with open('SubRouter_Models/label_to_category.json', 'r') as f:\n",
" label_to_category_subrouter = json.load(f)\n",
"\n",
"def predict_subrouter(text):\n",
" model_subrouter.eval()\n",
" inputs = tokenizer_subrouter(text, return_tensors='pt', padding=True, truncation=True, max_length=64).to(device)\n",
" with torch.no_grad():\n",
" outputs = model_subrouter(**inputs)\n",
" logits = outputs.logits\n",
" predicted_class_id = logits.argmax().item()\n",
" return label_to_category_subrouter[str(predicted_class_id)]\n",
"\n",
"# Example usage:\n",
"questions = [\"Compare depreciation ttm of amazon and intol\",\n",
" \"What is the current market trend for technology stocks?\",\n",
" \"How did shakti pumps perform in the last quarter?\",\n",
" \"Compare Amazon vs Apple in terms of revenues\",\n",
" \"What is the reasonable price of Nvidia?\",\n",
" \"Compare Apple vs Micrsoft\"]\n",
"for text in questions:\n",
" category = predict_subrouter(text)\n",
" print(f\"Question: {text}\\nPredicted Category: {category}\\n\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NER"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
"Token is valid (permission: fineGrained).\n",
"Your token has been saved to /home/ady/.cache/huggingface/token\n",
"Login successful\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-09-15 21:07:20,332\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO 09-15 21:07:20 llm_engine.py:223] Initializing an LLM engine (v0.6.1.post2) with config: model='meta-llama/Meta-Llama-3.1-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3.1-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, rope_scaling=None, rope_theta=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=26000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=meta-llama/Meta-Llama-3.1-8B-Instruct, use_v2_block_manager=False, num_scheduler_steps=1, enable_prefix_caching=False, use_async_output_proc=True)\n",
"INFO 09-15 21:07:21 model_runner.py:997] Starting to load model meta-llama/Meta-Llama-3.1-8B-Instruct...\n",
"INFO 09-15 21:07:22 weight_utils.py:242] Using model weights format ['*.safetensors']\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00, ?it/s]\n",
"Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:00<00:00, 6.39it/s]\n",
"Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:00<00:00, 2.18it/s]\n",
"Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:01<00:00, 1.74it/s]\n",
"Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.59it/s]\n",
"Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:02<00:00, 1.77it/s]\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO 09-15 21:07:24 model_runner.py:1008] Loading model weights took 14.9888 GB\n",
"INFO 09-15 21:07:28 gpu_executor.py:122] # GPU blocks: 13756, # CPU blocks: 2048\n",
"INFO 09-15 21:07:29 model_runner.py:1311] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.\n",
"INFO 09-15 21:07:29 model_runner.py:1315] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n"
]
},
{
"ename": "OutOfMemoryError",
"evalue": "CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.43 GiB of which 16.69 MiB is free. Including non-PyTorch memory, this process has 47.38 GiB memory in use. Of the allocated memory 46.94 GiB is allocated by PyTorch, with 33.31 MiB allocated in private pools (e.g., CUDA Graphs), and 41.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOutOfMemoryError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[14], line 18\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mvllm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m LLM, SamplingParams\n\u001b[1;32m 14\u001b[0m TEXT_LLM_MODEL_MISTRAL\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmeta-llama/Meta-Llama-3.1-8B-Instruct\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 18\u001b[0m vllm_model \u001b[38;5;241m=\u001b[39m \u001b[43mLLM\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mTEXT_LLM_MODEL_MISTRAL\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mtensor_parallel_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Use both GPUs\u001b[39;49;00m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mgpu_memory_utilization\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.95\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Lower memory utilization to prevent OOM\u001b[39;49;00m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_model_len\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m26000\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/entrypoints/llm.py:178\u001b[0m, in \u001b[0;36mLLM.__init__\u001b[0;34m(self, model, tokenizer, tokenizer_mode, skip_tokenizer_init, trust_remote_code, tensor_parallel_size, dtype, quantization, revision, tokenizer_revision, seed, gpu_memory_utilization, swap_space, cpu_offload_gb, enforce_eager, max_context_len_to_capture, max_seq_len_to_capture, disable_custom_all_reduce, disable_async_output_proc, **kwargs)\u001b[0m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m 155\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThere is no need to pass vision-related arguments anymore.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 156\u001b[0m engine_args \u001b[38;5;241m=\u001b[39m EngineArgs(\n\u001b[1;32m 157\u001b[0m model\u001b[38;5;241m=\u001b[39mmodel,\n\u001b[1;32m 158\u001b[0m tokenizer\u001b[38;5;241m=\u001b[39mtokenizer,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs,\n\u001b[1;32m 177\u001b[0m )\n\u001b[0;32m--> 178\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine \u001b[38;5;241m=\u001b[39m \u001b[43mLLMEngine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_engine_args\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 179\u001b[0m \u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43musage_context\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mUsageContext\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mLLM_CLASS\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest_counter \u001b[38;5;241m=\u001b[39m Counter()\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:550\u001b[0m, in \u001b[0;36mLLMEngine.from_engine_args\u001b[0;34m(cls, engine_args, usage_context, stat_loggers)\u001b[0m\n\u001b[1;32m 548\u001b[0m executor_class \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_get_executor_cls(engine_config)\n\u001b[1;32m 549\u001b[0m \u001b[38;5;66;03m# Create the LLM engine.\u001b[39;00m\n\u001b[0;32m--> 550\u001b[0m engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 551\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mengine_config\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_dict\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 552\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecutor_class\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecutor_class\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 553\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_stats\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mengine_args\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdisable_log_stats\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 554\u001b[0m \u001b[43m \u001b[49m\u001b[43musage_context\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43musage_context\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 555\u001b[0m \u001b[43m \u001b[49m\u001b[43mstat_loggers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstat_loggers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 556\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 558\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m engine\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:331\u001b[0m, in \u001b[0;36mLLMEngine.__init__\u001b[0;34m(self, model_config, cache_config, parallel_config, scheduler_config, device_config, load_config, lora_config, speculative_config, decoding_config, observability_config, prompt_adapter_config, executor_class, log_stats, usage_context, stat_loggers, input_registry)\u001b[0m\n\u001b[1;32m 317\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_executor \u001b[38;5;241m=\u001b[39m executor_class(\n\u001b[1;32m 318\u001b[0m model_config\u001b[38;5;241m=\u001b[39mmodel_config,\n\u001b[1;32m 319\u001b[0m cache_config\u001b[38;5;241m=\u001b[39mcache_config,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 327\u001b[0m observability_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config,\n\u001b[1;32m 328\u001b[0m )\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39membedding_mode:\n\u001b[0;32m--> 331\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_initialize_kv_caches\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[38;5;66;03m# If usage stat is enabled, collect relevant info.\u001b[39;00m\n\u001b[1;32m 334\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_usage_stats_enabled():\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/engine/llm_engine.py:473\u001b[0m, in \u001b[0;36mLLMEngine._initialize_kv_caches\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_gpu_blocks \u001b[38;5;241m=\u001b[39m num_gpu_blocks\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_cpu_blocks \u001b[38;5;241m=\u001b[39m num_cpu_blocks\n\u001b[0;32m--> 473\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_gpu_blocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cpu_blocks\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/executor/gpu_executor.py:125\u001b[0m, in \u001b[0;36mGPUExecutor.initialize_cache\u001b[0;34m(self, num_gpu_blocks, num_cpu_blocks)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;66;03m# NOTE: This is logged in the executor because there can be >1 worker\u001b[39;00m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;66;03m# with other executors. We could log in the engine level, but work\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;66;03m# remains to abstract away the device for non-GPU configurations.\u001b[39;00m\n\u001b[1;32m 122\u001b[0m logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m# GPU blocks: \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m, # CPU blocks: \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, num_gpu_blocks,\n\u001b[1;32m 123\u001b[0m num_cpu_blocks)\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize_cache\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_gpu_blocks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cpu_blocks\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker.py:266\u001b[0m, in \u001b[0;36mWorker.initialize_cache\u001b[0;34m(self, num_gpu_blocks, num_cpu_blocks)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcache_config\u001b[38;5;241m.\u001b[39mnum_cpu_blocks \u001b[38;5;241m=\u001b[39m num_cpu_blocks\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_init_cache_engine()\n\u001b[0;32m--> 266\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_warm_up_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/worker.py:282\u001b[0m, in \u001b[0;36mWorker._warm_up_model\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_warm_up_model\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 281\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39menforce_eager:\n\u001b[0;32m--> 282\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapture_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgpu_cache\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 283\u001b[0m \u001b[38;5;66;03m# Reset the seed to ensure that the random state is not affected by\u001b[39;00m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;66;03m# the model initialization and profiling.\u001b[39;00m\n\u001b[1;32m 285\u001b[0m set_random_seed(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel_config\u001b[38;5;241m.\u001b[39mseed)\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator..decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1422\u001b[0m, in \u001b[0;36mGPUModelRunnerBase.capture_model\u001b[0;34m(self, kv_caches)\u001b[0m\n\u001b[1;32m 1415\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhas_seqlen_agnostic:\n\u001b[1;32m 1416\u001b[0m \u001b[38;5;66;03m# Only used by Mamba-based models CUDA graph atm (Jamba)\u001b[39;00m\n\u001b[1;32m 1417\u001b[0m capture_inputs\u001b[38;5;241m.\u001b[39mupdate({\n\u001b[1;32m 1418\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseqlen_agnostic_capture_inputs\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 1419\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel\u001b[38;5;241m.\u001b[39mget_seqlen_agnostic_capture_inputs(\n\u001b[1;32m 1420\u001b[0m batch_size)\n\u001b[1;32m 1421\u001b[0m })\n\u001b[0;32m-> 1422\u001b[0m \u001b[43mgraph_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcapture\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcapture_inputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1423\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_memory_pool \u001b[38;5;241m=\u001b[39m graph_runner\u001b[38;5;241m.\u001b[39mgraph\u001b[38;5;241m.\u001b[39mpool()\n\u001b[1;32m 1424\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgraph_runners[virtual_engine][batch_size] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1425\u001b[0m graph_runner)\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/worker/model_runner.py:1665\u001b[0m, in \u001b[0;36mCUDAGraphRunner.capture\u001b[0;34m(self, input_ids, positions, hidden_or_intermediate_states, intermediate_inputs, kv_caches, attn_metadata, memory_pool, stream, **kwargs)\u001b[0m\n\u001b[1;32m 1660\u001b[0m \u001b[38;5;66;03m# Run the model a few times without capturing the graph.\u001b[39;00m\n\u001b[1;32m 1661\u001b[0m \u001b[38;5;66;03m# This is to make sure that the captured graph does not include the\u001b[39;00m\n\u001b[1;32m 1662\u001b[0m \u001b[38;5;66;03m# kernel launches for initial benchmarking (e.g., Triton autotune).\u001b[39;00m\n\u001b[1;32m 1663\u001b[0m \u001b[38;5;66;03m# Note one iteration is not enough for torch.jit.script\u001b[39;00m\n\u001b[1;32m 1664\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(_NUM_WARMUP_ITERS):\n\u001b[0;32m-> 1665\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1666\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1667\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1668\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1669\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1670\u001b[0m \u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_inputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1671\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1672\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1673\u001b[0m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39msynchronize()\n\u001b[1;32m 1675\u001b[0m \u001b[38;5;66;03m# Capture the graph.\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:448\u001b[0m, in \u001b[0;36mLlamaForCausalLM.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors)\u001b[0m\n\u001b[1;32m 440\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\n\u001b[1;32m 441\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 442\u001b[0m input_ids: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 446\u001b[0m intermediate_tensors: Optional[IntermediateTensors] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 447\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Union[torch\u001b[38;5;241m.\u001b[39mTensor, IntermediateTensors]:\n\u001b[0;32m--> 448\u001b[0m model_output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_ids\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 449\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_output\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:329\u001b[0m, in \u001b[0;36mLlamaModel.forward\u001b[0;34m(self, input_ids, positions, kv_caches, attn_metadata, intermediate_tensors, inputs_embeds)\u001b[0m\n\u001b[1;32m 327\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstart_layer, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mend_layer):\n\u001b[1;32m 328\u001b[0m layer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlayers[i]\n\u001b[0;32m--> 329\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[43mlayer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 330\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 331\u001b[0m \u001b[43m \u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 332\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[43m[\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstart_layer\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 333\u001b[0m \u001b[43m \u001b[49m\u001b[43mattn_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 334\u001b[0m \u001b[43m \u001b[49m\u001b[43mresidual\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 335\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 337\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n\u001b[1;32m 338\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m IntermediateTensors({\n\u001b[1;32m 339\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhidden_states\u001b[39m\u001b[38;5;124m\"\u001b[39m: hidden_states,\n\u001b[1;32m 340\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresidual\u001b[39m\u001b[38;5;124m\"\u001b[39m: residual\n\u001b[1;32m 341\u001b[0m })\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:261\u001b[0m, in \u001b[0;36mLlamaDecoderLayer.forward\u001b[0;34m(self, positions, hidden_states, kv_cache, attn_metadata, residual)\u001b[0m\n\u001b[1;32m 258\u001b[0m \u001b[38;5;66;03m# Fully Connected\u001b[39;00m\n\u001b[1;32m 259\u001b[0m hidden_states, residual \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpost_attention_layernorm(\n\u001b[1;32m 260\u001b[0m hidden_states, residual)\n\u001b[0;32m--> 261\u001b[0m hidden_states \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmlp\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhidden_states\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m hidden_states, residual\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/models/llama.py:87\u001b[0m, in \u001b[0;36mLlamaMLP.forward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, x):\n\u001b[0;32m---> 87\u001b[0m gate_up, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgate_up_proj\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 88\u001b[0m x \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mact_fn(gate_up)\n\u001b[1;32m 89\u001b[0m x, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdown_proj(x)\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1553\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1552\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1553\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/torch/nn/modules/module.py:1562\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1557\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1558\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1559\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1560\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1561\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1562\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1564\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 1565\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py:367\u001b[0m, in \u001b[0;36mColumnParallelLinear.forward\u001b[0;34m(self, input_)\u001b[0m\n\u001b[1;32m 365\u001b[0m \u001b[38;5;66;03m# Matrix multiply.\u001b[39;00m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mquant_method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m--> 367\u001b[0m output_parallel \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquant_method\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgather_output:\n\u001b[1;32m 369\u001b[0m \u001b[38;5;66;03m# All-gather across the partitions.\u001b[39;00m\n\u001b[1;32m 370\u001b[0m output \u001b[38;5;241m=\u001b[39m tensor_model_parallel_all_gather(output_parallel)\n",
"File \u001b[0;32m~/miniconda3/envs/vllm_env/lib/python3.11/site-packages/vllm/model_executor/layers/linear.py:135\u001b[0m, in \u001b[0;36mUnquantizedLinearMethod.apply\u001b[0;34m(self, layer, x, bias)\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mapply\u001b[39m(\u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 131\u001b[0m layer: torch\u001b[38;5;241m.\u001b[39mnn\u001b[38;5;241m.\u001b[39mModule,\n\u001b[1;32m 132\u001b[0m x: torch\u001b[38;5;241m.\u001b[39mTensor,\n\u001b[1;32m 133\u001b[0m bias: Optional[torch\u001b[38;5;241m.\u001b[39mTensor] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m torch\u001b[38;5;241m.\u001b[39mTensor:\n\u001b[0;32m--> 135\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlinear\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlayer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 47.43 GiB of which 16.69 MiB is free. Including non-PyTorch memory, this process has 47.38 GiB memory in use. Of the allocated memory 46.94 GiB is allocated by PyTorch, with 33.31 MiB allocated in private pools (e.g., CUDA Graphs), and 41.20 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
]
}
],
"source": [
"from huggingface_hub import login\n",
"import os \n",
"import torch\n",
"torch.cuda.empty_cache()\n",
"\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
"\n",
"# Use your Hugging Face token here\n",
"login(\"\")\n",
"\n",
"from vllm import LLM, SamplingParams\n",
"\n",
"\n",
"TEXT_LLM_MODEL_MISTRAL='meta-llama/Meta-Llama-3.1-8B-Instruct'\n",
"\n",
"\n",
"\n",
"vllm_model = LLM(\n",
" model=TEXT_LLM_MODEL_MISTRAL,\n",
" tensor_parallel_size=1, # Use both GPUs\n",
" gpu_memory_utilization=0.95, # Lower memory utilization to prevent OOM\n",
" max_model_len=26000)\n",
" \n",
" \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NER Data Creation"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Collecting matplotlib\n",
" Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)\n",
"Collecting contourpy>=1.0.1 (from matplotlib)\n",
" Downloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.4 kB)\n",
"Collecting cycler>=0.10 (from matplotlib)\n",
" Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\n",
"Collecting fonttools>=4.22.0 (from matplotlib)\n",
" Downloading fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (162 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m162.6/162.6 kB\u001b[0m \u001b[31m35.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib)\n",
" Downloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.3 kB)\n",
"Requirement already satisfied: numpy>=1.23 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (1.26.4)\n",
"Requirement already satisfied: packaging>=20.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (24.1)\n",
"Requirement already satisfied: pillow>=8 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (10.4.0)\n",
"Collecting pyparsing>=2.3.1 (from matplotlib)\n",
" Downloading pyparsing-3.1.4-py3-none-any.whl.metadata (5.1 kB)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from matplotlib) (2.9.0)\n",
"Requirement already satisfied: six>=1.5 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from python-dateutil>=2.7->matplotlib) (1.16.0)\n",
"Downloading matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (8.3 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.3/8.3 MB\u001b[0m \u001b[31m99.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mta \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (323 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.2/323.2 kB\u001b[0m \u001b[31m134.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading cycler-0.12.1-py3-none-any.whl (8.3 kB)\n",
"Downloading fonttools-4.53.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.9 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.9/4.9 MB\u001b[0m \u001b[31m100.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
"\u001b[?25hDownloading kiwisolver-1.4.7-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.4/1.4 MB\u001b[0m \u001b[31m122.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading pyparsing-3.1.4-py3-none-any.whl (104 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.1/104.1 kB\u001b[0m \u001b[31m547.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: pyparsing, kiwisolver, fonttools, cycler, contourpy, matplotlib\n",
"Successfully installed contourpy-1.3.0 cycler-0.12.1 fonttools-4.53.1 kiwisolver-1.4.7 matplotlib-3.9.2 pyparsing-3.1.4\n"
]
}
],
"source": [
"!pip install matplotlib\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
"To disable this warning, you can either:\n",
"\t- Avoid using `tokenizers` before the fork if possible\n",
"\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Collecting seqeval\n",
" Downloading seqeval-1.2.2.tar.gz (43 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m43.6/43.6 kB\u001b[0m \u001b[31m249.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from seqeval) (1.26.4)\n",
"Requirement already satisfied: scikit-learn>=0.21.3 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from seqeval) (1.5.1)\n",
"Requirement already satisfied: scipy>=1.6.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (1.14.1)\n",
"Requirement already satisfied: joblib>=1.2.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (1.4.2)\n",
"Requirement already satisfied: threadpoolctl>=3.1.0 in /home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages (from scikit-learn>=0.21.3->seqeval) (3.5.0)\n",
"Building wheels for collected packages: seqeval\n",
" Building wheel for seqeval (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16162 sha256=3ad02021d3334b570a31b1504d9d0c4da3569f9ff09c9efb6da8b2b37276ac87\n",
" Stored in directory: /tmp/pip-ephem-wheel-cache-a6hygjhv/wheels/bc/92/f0/243288f899c2eacdfa8c5f9aede4c71a9bad0ee26a01dc5ead\n",
"Successfully built seqeval\n",
"Installing collected packages: seqeval\n",
"Successfully installed seqeval-1.2.2\n"
]
}
],
"source": [
"!pip install seqeval\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import aiohttp\n",
"import asyncio\n",
"import nest_asyncio\n",
"import random\n",
"import re\n",
"pd.set_option('max_colwidth', 2400)\n",
"\n",
"# # Apply the nest_asyncio patch\n",
"# nest_asyncio.apply()\n",
"\n",
"# # Set environment variables\n",
"# OPEN_AI_API_KEY = \"EMPTY\" # Replace with your actual API key\n",
"# OPENAI_API_BASE_MIXTRAL = os.getenv(\"OPENAI_API_BASE_MIXTRAL\", \"http://0.0.0.0:8004/v1\")\n",
"# TEXT_LLM_MODEL_MIXTRAL = 'cognitivecomputations/dolphin-2.8-mistral-7b-v02'\n",
"\n",
"# List of metrics to be included in questions\n",
"metrics = [\n",
" \"Return on Capital Employed\",\n",
" \"Debt ratio\",\n",
" \"Debt-Equity ratio\",\n",
" \"Interest Coverage\",\n",
" \"Dividend Payout ratio\",\n",
" \"PE Ratio\",\n",
" \"PE to Growth ratio\",\n",
" \"Dividend yield\",\n",
" \"Enterprise Value Multiple\",\n",
" \"Dividend yield TTM\",\n",
" \"Dividend yield percentage TTM\",\n",
" \"PE ratio TTM\",\n",
" \"PEF ratio TTM\",\n",
" \"Current ratio TTM\",\n",
" \"Quick ratio TTM\",\n",
" \"Gross Profit Margin TTM\",\n",
" \"Operating Profit Margin TTM\",\n",
" \"Net-Profit Margin TTM\",\n",
" \"Return on Assets TTM\",\n",
" \"Return on Equity TTM\",\n",
" \"Return on Capital Employed TTM\",\n",
" \"Debt ratio TTM\",\n",
" \"Debt-Equity ratio TTM\",\n",
" \"Interest Coverage TTM\",\n",
" \"PE ratio TTM\",\n",
" \"PEG ratio TTM\",\n",
" \"Price to Sales ratio TTM\",\n",
" \"Price to Fair-Value TTM\",\n",
" \"Dividend per share TTM\",\n",
" \"Revenue growth\",\n",
" \"EBITDA growth\",\n",
" \"Net-Income growth\",\n",
" \"Growth in Net-Income-ratio\",\n",
" \"EPS growth\",\n",
" \"Total-Debt growth\",\n",
" \"NetDebt growth\",\n",
" \"Altman Z Score\",\n",
" \"Piotroski Score\",\n",
" \"Working Capital\",\n",
" \"Total Assets\",\n",
" \"Retained Earnings\",\n",
" \"EBIT\",\n",
" \"Total Liabilities\"\n",
"]\n",
"\n",
"# List of entities (companies) to be included in questions\n",
"entities = [\n",
" \"Amazon\", \"Apple\", \"Microsoft\", \"Google\", \"Facebook\", \"Nvidia\",\n",
" \"Meta\", \"Tesla\", \"Broadcom\", \"ASML\", \"Costco\", \"Cisco\", \"Intel\", \"Adobe\"\n",
"]\n",
"\n",
"# New list of example questions\n",
"example_questions = [\n",
" \"What’s is the P/E of CVS compared to competitors\",\n",
" \"Is abercrombie still a good buy\",\n",
" \"Analyse CVS for me\",\n",
" \"How does Arch Resources' return on equity compare to its competitors in the coal mining sector?\",\n",
" \"What is the intrinsic value of Brookfield corporation\",\n",
" \"What is the P/E ratio of Brookfield Corporation?\",\n",
" \"Which companies, similar to Amazon, have a significant presence in both e-commerce and cloud computing services?\",\n",
" \"Which tech-driven businesses, akin to Amazon, provide extensive e-commerce solutions and operate successful cloud computing divisions?\",\n",
" \"Is Dynagas good stock for long run\",\n",
" \"Is Taiwan semiconductor good stock in the long run\",\n",
" \"Will Electric Vehicles outperform the market this year?\",\n",
" \"Analyse latest earning call of Meta\",\n",
" \"How does QSR.TO's market capitalization compare to its peers in the restaurant sector?\",\n",
" \"How have Amazon's focus areas changed?\",\n",
" \"What are the key risks investing in nvidia\",\n",
" \"What is the ROI of Starbucks?\",\n",
" \"What do you think of ticker LNG?\",\n",
" \"Analyze CDW\",\n",
" \"Is it a right time to buy Nvidia stock?\",\n",
" \"Levi stock\",\n",
" \"Tell me about TSLA but talk like a pirate\",\n",
" \"What about BYD?\",\n",
" \"What is Starbucks' return on equity (ROE) compared to its competitors in the food and beverage sector?\",\n",
" \"What is McDonald's ROE compared to Starbucks in the food and beverage sector?\",\n",
" \"How does Starbucks' ROE compare to Dunkin' Donuts in the coffee and bakery sector?\",\n",
" \"Why the company DoubleVerify Holdings shares fell?\",\n",
" \"What are the latest news for AMD?\",\n",
" \"What is the latest close price of NVDA?\",\n",
" \"Why is Tesla so overrated?\",\n",
" \"How do Costco and Walmart compare in terms of their respective market shares in the retail industry?\",\n",
" \"What are the consensus EPS forecasts for PYPL for the next 3 financial years?\",\n",
" \"What is PYPL EPS in FY24, FY25 and FY26?\",\n",
" \"Give me a chart of coke's P/E ratio for the last 5 years\",\n",
" \"What is the stock price of Apple?\",\n",
" \"Compare AMD and intel fundamentals for the last 5 years\",\n",
" \"Compare the stocks of AMD and intel based on their fundamentals for the last 5 years\",\n",
" \"What is the average return on equity (ROE) for AMD and Intel over the last 5 years?\",\n",
" \"Do a sentiment comparison between intel and amd for the last 2 years\",\n",
" \"How the options are looking for both amd and intel for the last 10 trading days\",\n",
" \"How has the price-to-earnings ratio (P/E) for AMD and Intel compared over the last 10 trading days?\",\n",
" \"Why intel price dropped during 2021 and 2022?\",\n",
" \"How is ASTS?\",\n",
" \"Is MTCH revenue growing?\",\n",
" \"Tell me about amazon\",\n",
" \"Will GME go up or go down today?\",\n",
" \"What are the key factors influencing GME's stock price movement?\",\n",
" \"Berkshire b vs voo\",\n",
" \"Compare Berkshire b with voo for past 20 years performance and report me\",\n",
" \"What are your thoughts on moneylion?\",\n",
" \"Akon\",\n",
" \"Predictions on NVIDIA for tomorrow\",\n",
" \"Value of oracle\",\n",
" \"Tell me about OKLA\",\n",
" \"Is ASTS a good investment?\",\n",
" \"Is smci good? why or why not\",\n",
" \"Should I buy accenture?\",\n",
" \"What is Accenture's return on equity (ROE) compared to its competitors in the IT services industry?\",\n",
" \"Should I invest in Intel intc?\",\n",
" \"What do you think about the ticker ENPH?\",\n",
" \"What is status of apple stocks?\",\n",
" \"Should I buy more apple?\",\n",
" \"What is the average growth rate of HD revenue?\",\n",
" \"What is the average growth rate of Home Depot revenue?\",\n",
" \"I have been tracking the share price of MSI (Motorola Solutions). What are the predictions for the next 6 months?\",\n",
" \"How's uber doing lately?\",\n",
" \"Show me the revenue, income, price graphs for Uber\",\n",
" \"Is ANGELONE a good stock to invest in?\",\n",
" \"Nvidia metrics and comparables\",\n",
" \"How does Nvidia's profitability compare to other semiconductor companies?\",\n",
" \"How does Nvidia's market capitalization compare to other semiconductor companies?\",\n",
" \"What do you think about hapag loyd?\",\n",
" \"Amazon latest results\",\n",
" \"Can you give me important informations for cloudlfare?\",\n",
" \"What is MSFT last 3 year revenue?\",\n",
" \"What is the revenue growth rate of MSFT in the last 3 years?\",\n",
" \"What is asts?\",\n",
" \"Future of polestar\",\n",
" \"What are some key metrics of nvidia over the past few years?\",\n",
" \"What is Nvidia's ROE and ROA over the past few years?\",\n",
" \"Is ford a good investment right now?\",\n",
" \"What are the prospects of growth for paypal?\",\n",
" \"Would Costco be a good investment right now?\",\n",
" \"Costco vs wallmart\",\n",
" \"How do Costco and Walmart compare in terms of their respective market shares in the retail industry?\",\n",
" \"How would you value costco?\",\n",
" \"What is Costco's price-to-earnings ratio compared to other discount retailers?\",\n",
" \"How is Ocugen performing?\",\n",
" \"I meant OCGN\",\n",
" \"What was Apple’s revenue last year?\",\n",
" \"What is the outlook for ARM stock?\",\n",
" \"What is the outlook for ARM stock?\",\n",
" \"Outlook for giig stock in 2025\",\n",
" \"Is SLS a good buy right now?\",\n",
" \"Is AMD a good buy right now?\",\n",
" \"What is happening with Tesla stock?\",\n",
" \"What is the short interest on WIRE?\",\n",
" \"What is the market cap of WIRE?\",\n",
" \"Is AMD good to be invested now or should I vest my RSU now?\",\n",
" \"What are the growth prospects for Intel in the semiconductor industry?\",\n",
" \"NVDA\",\n",
" \"Analyze the stock shw\",\n",
" \"Zts\",\n",
" \"Analyze zts\",\n",
" \"Tell me about the outlook of The Qt Company, QTCOM\",\n",
" \"What is the average revenue growth rate of The Qt Company in the past five years compared to its competitors in the software development industry?\",\n",
" \"META vs MSFT vs NVDA?\",\n",
" \"NVDA\",\n",
" \"How does NVDA's market capitalization compare to its peers in the semiconductor industry?\",\n",
" \"What is JPM’s net interest margin over the last 10 years?\",\n",
" \"How was Q1 of Smart Sand compared to Q4 2023?\",\n",
" \"Price target for NVDA?\",\n",
" \"What is the price target for NVIDIA's competitors like AMD and Intel?\",\n",
" \"Is lulu a good buy?\",\n",
" \"Is nvidia worth buying?\",\n",
" \"What is the market cap of Nvidia compared to its competitors?\",\n",
" \"What stock trend is predicted for nvda?\",\n",
" \"Should I buy calls on NVDA for December 2024?\",\n",
" \"What do you think about SQQQ?\",\n",
" \"What are the main competitors to crocs inc.?\",\n",
" \"What is the long term debt of walgreen boots alliance (WBA)?\",\n",
" \"What is the long term debt of CVS Health (CVS)?\",\n",
" \"What is future of DAX40?\",\n",
" \"What's the intrinsic value of nvidia?\",\n",
" \"What is the market capitalization of Nvidia's competitors?\",\n",
" \"What is the intrinsic value of Amazon?\",\n",
" \"What is the intrinsic value of Walmart?\",\n",
" \"What is Linde dcf value?\",\n",
" \"What can you tell me about linde?\",\n",
" \"What can you tell me about evvty (evolution ab)?\",\n",
" \"Give me the evvty important numbers\",\n",
" \"EVVTY Analysis\",\n",
" \"How does EVVTY's ROE compare to its peers in the industry?\",\n",
" \"What is quarterly sales and earnings of fiserv for last 2 years?\",\n",
" \"What is the average quarterly sales and earnings growth rate of Fiserv and its competitors in the financial technology industry over the last 2 years?\",\n",
" \"Calculate the price action movement for the last 30 days for google\",\n",
" \"What are the top competitors of Albemarle in the chemical industry?\",\n",
" \"What is nvidia?\",\n",
" \"What is Nvidia's primary business?\",\n",
" \"Current risk in Ulta stock?\",\n",
" \"What is the current risk in Ulta's competitors' stocks?\",\n",
" \"What is the forecast of Nano one materials?\"\n",
"]\n",
"\n",
"\n",
"df_list=[]\n",
"for metric in metrics:\n",
" for entity in entities:\n",
" for example_question in example_questions:\n",
" for _ in range(3): # Repeat 10 times\n",
" prompt = f\"\"\" You are an expert stock market analyst tasked with creating questions for training a model. Here are some example questions for reference:\\n\\n{example_questions}\\n\\nNow, generate 1 unique, diverse, and insightful question specifically for the metric '{metric}' and the entity '{entity}'. Ensure the question provides significant analytical depth and covers the key aspects outlined. The question should be less than 20 words and must contain both the metric '{metric}' and the entity '{entity}' exactly as specified. Use the metric '{metric}' and entity '{entity}' exactly as provided in your response, this is very very important; specifically check for this condition every single time before generating a question - like if metric is Operating Cash Flow - it should be used as it is. Start generating the questions directly without any preamble.Provide question in xml tags i.e ....\"\"\"\n",
" df_list.append({'Example Question': example_question,'Metric' :metric,\"Entity\": entity,'Prompt': prompt})\n",
"\n",
"\n",
"# Create a DataFrame from the list\n",
"full_df = pd.DataFrame(df_list).sample(n=25000)\n",
"full_df\n",
"\n",
"prompts=[]\n",
"all_questions=full_df['Prompt'].values.tolist()\n",
"\n",
"for question in all_questions:\n",
" prompts.append(question)\n",
"\n",
"sampling_params = SamplingParams(\n",
" temperature=0.7,\n",
" max_tokens=100\n",
" )\n",
"\n",
"outputs = vllm_model.generate(prompts, sampling_params)\n",
"\n",
"llm_output=[]\n",
"for output in outputs:\n",
" try:\n",
" generated_text = output.outputs[0].text\n",
" match = re.search(r'(.*?)', generated_text, re.DOTALL)\n",
" question_text = match.group(1).strip() # Strips any leading/trailing whitespace\n",
"\n",
"\n",
" llm_output.append(question_text)\n",
" except:\n",
" llm_output.append('')\n",
"\n",
"\n",
"full_df['Response'] =llm_output\n",
"display(full_df)\n",
"full_df=full_df[full_df['Response']!='']\n",
"\n",
"# Define the directory path and file name\n",
"directory = '/home/ady/Stockbuzz.ai_Data/RouterNER/'\n",
"file_path = os.path.join(directory, 'Ady_User_Database_NER_Entities_Metrics_2.csv')\n",
"\n",
"# Check if the directory exists, and create it if not\n",
"if not os.path.exists(directory):\n",
" os.makedirs(directory)\n",
"\n",
"# Now save the DataFrame to the CSV file\n",
"full_df.to_csv(file_path, index=False)\n",
"\n",
"print(f\"Synthetic questions dataset created and saved to {file_path}.\")\n",
"# full_df.to_csv('/home/ady/Stocks_Guidance/StockBuzz_v2_June_24/StockBuzz_v2_June_24/AgentsTest/NER/Ady_User_Database_NER_Entities_Metrics_2.csv', index=False)# print(\"Synthetic questions dataset created and saved to 'Ady_Database4.csv'.\")\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NER Model"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Size of train dataset: 19891\n",
"Size of validation dataset: 4975\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training Label Counts:\n",
"Counter({'O': 322182, 'I-METRIC': 31143, 'B-ENTITY': 19886, 'B-METRIC': 16255})\n",
"\n",
"Validation Label Counts:\n",
"Counter({'O': 80848, 'I-METRIC': 7635, 'B-ENTITY': 4970, 'B-METRIC': 4037})\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Map: 100%|██████████| 19891/19891 [00:01<00:00, 13101.13 examples/s]\n",
"Map: 100%|██████████| 4975/4975 [00:00<00:00, 7058.05 examples/s] \n",
"/home/ady/miniconda3/envs/vllm_env/lib/python3.11/site-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
" warnings.warn(\n",
"torch.distributed process group is initialized, but parallel_mode != ParallelMode.DISTRIBUTED. In order to use Torch DDP, launch your script with `python -m torch.distributed.launch\n"
]
},
{
"data": {
"text/html": [
"\n",
" \n",
" \n",
"
\n",
" [ 350/1550 09:57 < 34:19, 0.58 it/s, Epoch 2/10]\n",
"
\n",
" \n",
" \n",
" \n",
" | Step | \n",
" Training Loss | \n",
" Validation Loss | \n",
" Overall Precision | \n",
" Overall Recall | \n",
" Overall F1 | \n",
" Overall Accuracy | \n",
" Entity Precision | \n",
" Entity Recall | \n",
" Entity F1 | \n",
" Entity Accuracy | \n",
" Metric Precision | \n",
" Metric Recall | \n",
" Metric F1 | \n",
" Metric Accuracy | \n",
"
\n",
" \n",
" \n",
" \n",
" | 10 | \n",
" 1.767700 | \n",
" 1.836932 | \n",
" 0.294579 | \n",
" 0.124076 | \n",
" 0.174607 | \n",
" 0.773001 | \n",
" 0.348045 | \n",
" 0.201776 | \n",
" 0.255454 | \n",
" 0.000000 | \n",
" 0.007485 | \n",
" 0.000931 | \n",
" 0.001657 | \n",
" 0.005126 | \n",
"
\n",
" \n",
" | 20 | \n",
" 1.659600 | \n",
" 1.680501 | \n",
" 0.293320 | \n",
" 0.124219 | \n",
" 0.174527 | \n",
" 0.773039 | \n",
" 0.348308 | \n",
" 0.202010 | \n",
" 0.255712 | \n",
" 0.000000 | \n",
" 0.007236 | \n",
" 0.000931 | \n",
" 0.001650 | \n",
" 0.006003 | \n",
"
\n",
" \n",
" | 30 | \n",
" 1.514500 | \n",
" 1.429845 | \n",
" 0.284166 | \n",
" 0.124219 | \n",
" 0.172870 | \n",
" 0.773452 | \n",
" 0.348168 | \n",
" 0.202010 | \n",
" 0.255675 | \n",
" 0.000000 | \n",
" 0.006010 | \n",
" 0.000931 | \n",
" 0.001613 | \n",
" 0.013421 | \n",
"
\n",
" \n",
" | 40 | \n",
" 1.287100 | \n",
" 1.152468 | \n",
" 0.219771 | \n",
" 0.126589 | \n",
" 0.160645 | \n",
" 0.799228 | \n",
" 0.347879 | \n",
" 0.202126 | \n",
" 0.255690 | \n",
" 0.000234 | \n",
" 0.015226 | \n",
" 0.006893 | \n",
" 0.009490 | \n",
" 0.217846 | \n",
"
\n",
" \n",
" | 50 | \n",
" 1.047800 | \n",
" 0.888566 | \n",
" 0.251184 | \n",
" 0.152366 | \n",
" 0.189676 | \n",
" 0.810015 | \n",
" 0.357186 | \n",
" 0.209370 | \n",
" 0.263995 | \n",
" 0.014488 | \n",
" 0.123117 | \n",
" 0.062407 | \n",
" 0.082829 | \n",
" 0.299521 | \n",
"
\n",
" \n",
" | 60 | \n",
" 0.781200 | \n",
" 0.600187 | \n",
" 0.709895 | \n",
" 0.603719 | \n",
" 0.652516 | \n",
" 0.884783 | \n",
" 0.959285 | \n",
" 0.922187 | \n",
" 0.940371 | \n",
" 0.838766 | \n",
" 0.179177 | \n",
" 0.096498 | \n",
" 0.125439 | \n",
" 0.358333 | \n",
"
\n",
" \n",
" | 70 | \n",
" 0.541900 | \n",
" 0.364509 | \n",
" 0.764858 | \n",
" 0.680118 | \n",
" 0.720003 | \n",
" 0.906769 | \n",
" 0.993325 | \n",
" 0.991004 | \n",
" 0.992163 | \n",
" 0.988784 | \n",
" 0.319677 | \n",
" 0.184613 | \n",
" 0.234058 | \n",
" 0.431712 | \n",
"
\n",
" \n",
" | 80 | \n",
" 0.352200 | \n",
" 0.244981 | \n",
" 0.827778 | \n",
" 0.832771 | \n",
" 0.830267 | \n",
" 0.943034 | \n",
" 0.995204 | \n",
" 0.994041 | \n",
" 0.994622 | \n",
" 0.993691 | \n",
" 0.689579 | \n",
" 0.583085 | \n",
" 0.631876 | \n",
" 0.707830 | \n",
"
\n",
" \n",
" | 90 | \n",
" 0.246500 | \n",
" 0.153416 | \n",
" 0.854047 | \n",
" 0.877289 | \n",
" 0.865512 | \n",
" 0.962899 | \n",
" 0.998714 | \n",
" 0.998131 | \n",
" 0.998422 | \n",
" 0.998014 | \n",
" 0.751921 | \n",
" 0.692809 | \n",
" 0.721156 | \n",
" 0.829770 | \n",
"
\n",
" \n",
" | 100 | \n",
" 0.168000 | \n",
" 0.103761 | \n",
" 0.901217 | \n",
" 0.936095 | \n",
" 0.918325 | \n",
" 0.976152 | \n",
" 0.999182 | \n",
" 0.999065 | \n",
" 0.999124 | \n",
" 0.998948 | \n",
" 0.891855 | \n",
" 0.846498 | \n",
" 0.868585 | \n",
" 0.920213 | \n",
"
\n",
" \n",
" | 110 | \n",
" 0.115500 | \n",
" 0.064440 | \n",
" 0.919706 | \n",
" 0.961442 | \n",
" 0.940111 | \n",
" 0.985192 | \n",
" 0.999883 | \n",
" 0.999883 | \n",
" 0.999883 | \n",
" 0.999766 | \n",
" 0.935521 | \n",
" 0.913562 | \n",
" 0.924411 | \n",
" 0.960478 | \n",
"
\n",
" \n",
" | 120 | \n",
" 0.073600 | \n",
" 0.036066 | \n",
" 0.952067 | \n",
" 0.975515 | \n",
" 0.963649 | \n",
" 0.991613 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.955869 | \n",
" 0.948212 | \n",
" 0.952025 | \n",
" 0.976799 | \n",
"
\n",
" \n",
" | 130 | \n",
" 0.051400 | \n",
" 0.026381 | \n",
" 0.965401 | \n",
" 0.985711 | \n",
" 0.975450 | \n",
" 0.993743 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.980513 | \n",
" 0.974851 | \n",
" 0.977674 | \n",
" 0.987860 | \n",
"
\n",
" \n",
" | 140 | \n",
" 0.040200 | \n",
" 0.022301 | \n",
" 0.972017 | \n",
" 0.992676 | \n",
" 0.982238 | \n",
" 0.995115 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.992896 | \n",
" 0.989382 | \n",
" 0.991136 | \n",
" 0.994537 | \n",
"
\n",
" \n",
" | 150 | \n",
" 0.031300 | \n",
" 0.016631 | \n",
" 0.977112 | \n",
" 0.993179 | \n",
" 0.985080 | \n",
" 0.996085 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.992331 | \n",
" 0.988264 | \n",
" 0.990293 | \n",
" 0.993997 | \n",
"
\n",
" \n",
" | 160 | \n",
" 0.025900 | \n",
" 0.015102 | \n",
" 0.979862 | \n",
" 0.995692 | \n",
" 0.987713 | \n",
" 0.996622 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.997385 | \n",
" 0.994784 | \n",
" 0.996083 | \n",
" 0.997167 | \n",
"
\n",
" \n",
" | 170 | \n",
" 0.021500 | \n",
" 0.010109 | \n",
" 0.990361 | \n",
" 0.995979 | \n",
" 0.993162 | \n",
" 0.998167 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.996267 | \n",
" 0.994225 | \n",
" 0.995245 | \n",
" 0.996965 | \n",
"
\n",
" \n",
" | 180 | \n",
" 0.015400 | \n",
" 0.006890 | \n",
" 0.995055 | \n",
" 0.996912 | \n",
" 0.995983 | \n",
" 0.998935 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.996453 | \n",
" 0.994411 | \n",
" 0.995431 | \n",
" 0.996965 | \n",
"
\n",
" \n",
" | 190 | \n",
" 0.013000 | \n",
" 0.007091 | \n",
" 0.994557 | \n",
" 0.997200 | \n",
" 0.995877 | \n",
" 0.998935 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999440 | \n",
" 0.997392 | \n",
" 0.998415 | \n",
" 0.998516 | \n",
"
\n",
" \n",
" | 200 | \n",
" 0.009600 | \n",
" 0.005577 | \n",
" 0.996559 | \n",
" 0.998133 | \n",
" 0.997345 | \n",
" 0.999251 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999067 | \n",
" 0.997206 | \n",
" 0.998135 | \n",
" 0.998584 | \n",
"
\n",
" \n",
" | 210 | \n",
" 0.009400 | \n",
" 0.004625 | \n",
" 0.997203 | \n",
" 0.998420 | \n",
" 0.997811 | \n",
" 0.999357 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.998134 | \n",
" 0.996647 | \n",
" 0.997390 | \n",
" 0.998314 | \n",
"
\n",
" \n",
" | 220 | \n",
" 0.009500 | \n",
" 0.005023 | \n",
" 0.997061 | \n",
" 0.998779 | \n",
" 0.997920 | \n",
" 0.999319 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999254 | \n",
" 0.997765 | \n",
" 0.998509 | \n",
" 0.998921 | \n",
"
\n",
" \n",
" | 230 | \n",
" 0.006500 | \n",
" 0.003757 | \n",
" 0.997704 | \n",
" 0.998420 | \n",
" 0.998062 | \n",
" 0.999463 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.998134 | \n",
" 0.996647 | \n",
" 0.997390 | \n",
" 0.998381 | \n",
"
\n",
" \n",
" | 240 | \n",
" 0.005100 | \n",
" 0.003936 | \n",
" 0.997777 | \n",
" 0.999138 | \n",
" 0.998457 | \n",
" 0.999453 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999813 | \n",
" 0.998510 | \n",
" 0.999161 | \n",
" 0.999123 | \n",
"
\n",
" \n",
" | 250 | \n",
" 0.005600 | \n",
" 0.003105 | \n",
" 0.998493 | \n",
" 0.998779 | \n",
" 0.998636 | \n",
" 0.999568 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999440 | \n",
" 0.997578 | \n",
" 0.998508 | \n",
" 0.998651 | \n",
"
\n",
" \n",
" | 260 | \n",
" 0.005700 | \n",
" 0.003194 | \n",
" 0.998135 | \n",
" 0.999138 | \n",
" 0.998636 | \n",
" 0.999511 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999813 | \n",
" 0.998510 | \n",
" 0.999161 | \n",
" 0.999123 | \n",
"
\n",
" \n",
" | 270 | \n",
" 0.004500 | \n",
" 0.002846 | \n",
" 0.998493 | \n",
" 0.999210 | \n",
" 0.998852 | \n",
" 0.999616 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999814 | \n",
" 0.998696 | \n",
" 0.999254 | \n",
" 0.999258 | \n",
"
\n",
" \n",
" | 280 | \n",
" 0.005000 | \n",
" 0.002617 | \n",
" 0.998565 | \n",
" 0.999210 | \n",
" 0.998887 | \n",
" 0.999655 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999441 | \n",
" 0.998510 | \n",
" 0.998975 | \n",
" 0.999258 | \n",
"
\n",
" \n",
" | 290 | \n",
" 0.003400 | \n",
" 0.002572 | \n",
" 0.998278 | \n",
" 0.999138 | \n",
" 0.998708 | \n",
" 0.999626 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999254 | \n",
" 0.998510 | \n",
" 0.998882 | \n",
" 0.999326 | \n",
"
\n",
" \n",
" | 300 | \n",
" 0.005100 | \n",
" 0.002382 | \n",
" 0.998278 | \n",
" 0.999210 | \n",
" 0.998744 | \n",
" 0.999664 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999068 | \n",
" 0.998696 | \n",
" 0.998882 | \n",
" 0.999393 | \n",
"
\n",
" \n",
" | 310 | \n",
" 0.005600 | \n",
" 0.002559 | \n",
" 0.998135 | \n",
" 0.999354 | \n",
" 0.998744 | \n",
" 0.999655 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999069 | \n",
" 0.999069 | \n",
" 0.999069 | \n",
" 0.999528 | \n",
"
\n",
" \n",
" | 320 | \n",
" 0.005000 | \n",
" 0.002000 | \n",
" 0.998923 | \n",
" 0.999138 | \n",
" 0.999031 | \n",
" 0.999674 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999441 | \n",
" 0.998323 | \n",
" 0.998882 | \n",
" 0.998988 | \n",
"
\n",
" \n",
" | 330 | \n",
" 0.003600 | \n",
" 0.001996 | \n",
" 0.999067 | \n",
" 0.999138 | \n",
" 0.999102 | \n",
" 0.999664 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999942 | \n",
" 0.999766 | \n",
" 0.999627 | \n",
" 0.998137 | \n",
" 0.998881 | \n",
" 0.998853 | \n",
"
\n",
" \n",
" | 340 | \n",
" 0.004300 | \n",
" 0.002259 | \n",
" 0.998350 | \n",
" 0.999067 | \n",
" 0.998708 | \n",
" 0.999635 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999068 | \n",
" 0.998137 | \n",
" 0.998602 | \n",
" 0.999056 | \n",
"
\n",
" \n",
" | 350 | \n",
" 0.002900 | \n",
" 0.002071 | \n",
" 0.998923 | \n",
" 0.999210 | \n",
" 0.999067 | \n",
" 0.999674 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 0.999883 | \n",
" 0.999627 | \n",
" 0.998323 | \n",
" 0.998975 | \n",
" 0.998988 | \n",
"
\n",
" \n",
"
"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n",
"\n",
"import re\n",
"import numpy as np\n",
"import torch\n",
"import pandas as pd\n",
"import time\n",
"from datasets import Dataset, load_metric\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer, DataCollatorForTokenClassification, TrainerCallback, EarlyStoppingCallback\n",
"from transformers.trainer_utils import IntervalStrategy\n",
"import matplotlib.pyplot as plt\n",
"from collections import Counter\n",
"import torch.nn as nn\n",
"\n",
"pd.set_option('display.width', 1000)\n",
"\n",
"# Define the label mapping\n",
"labels = [\"O\", \"B-ENTITY\", \"I-ENTITY\", \"B-METRIC\", \"I-METRIC\"]\n",
"label2id = {label: i for i, label in enumerate(labels)}\n",
"id2label = {i: label for i, label in enumerate(labels)}\n",
"\n",
"# Load the dataset\n",
"df_input_consolidated = pd.read_csv('/home/ady/Stockbuzz.ai_Data/RouterNER/Ady_User_Database_NER_Entities_Metrics_2.csv')\n",
"\n",
"df_input_consolidated = df_input_consolidated[df_input_consolidated['Response'] != 'Error: ']\n",
"df_input_consolidated['Response'] = df_input_consolidated['Response'].str.lower()\n",
"df_input_consolidated['Metric'] = df_input_consolidated['Metric'].str.lower()\n",
"df_input_consolidated['Entity'] = df_input_consolidated['Entity'].str.lower()\n",
"\n",
"train_set = int(0.8 * len(df_input_consolidated))\n",
"df_train = df_input_consolidated[:train_set][['Response', 'Metric', 'Entity']]\n",
"df_val = df_input_consolidated[train_set:][['Response', 'Metric', 'Entity']]\n",
"\n",
"# Function to tokenize sentences and assign NER tags\n",
"def tag_entities_and_metrics(sentence, metric, entity):\n",
" words = re.findall(r\"\\w+|[.,!?;'-]|\\b's\\b\", sentence)\n",
" tags = ['O'] * len(words)\n",
"\n",
" # Tagging the metric\n",
" metric_tokens = metric.split()\n",
" metric_len = len(metric_tokens)\n",
" for i in range(len(words) - metric_len + 1):\n",
" if words[i:i + metric_len] == metric_tokens:\n",
" tags[i] = 'B-METRIC'\n",
" for j in range(1, metric_len):\n",
" tags[i + j] = 'I-METRIC'\n",
"\n",
" # Tagging the entity\n",
" entity_tokens = entity.split()\n",
" entity_len = len(entity_tokens)\n",
" for i in range(len(words) - entity_len + 1):\n",
" if words[i:i + entity_len] == entity_tokens:\n",
" tags[i] = 'B-ENTITY'\n",
" for j in range(1, entity_len):\n",
" tags[i + j] = 'I-ENTITY'\n",
"\n",
" return words, tags\n",
"\n",
"# Function to process dataframe and prepare datasets\n",
"def prepare_dataset(df):\n",
" df['tokens_and_tags'] = df.apply(lambda row: tag_entities_and_metrics(row['Response'], row['Metric'], row['Entity']), axis=1)\n",
" df['tokens'] = df['tokens_and_tags'].apply(lambda x: x[0])\n",
" df['ner_tags'] = df['tokens_and_tags'].apply(lambda x: x[1])\n",
" df = df.drop(columns=['tokens_and_tags'])\n",
" df = df[df['ner_tags'].apply(lambda tags: not all(tag == 'O' for tag in tags))]\n",
" df['ner_tags'] = df['ner_tags'].apply(lambda tags: [label2id[tag] for tag in tags])\n",
" return Dataset.from_pandas(df)\n",
"\n",
"# Prepare datasets\n",
"train_dataset = prepare_dataset(df_train)\n",
"val_dataset = prepare_dataset(df_val)\n",
"\n",
"# Print the size of the train and validation datasets\n",
"print(f\"Size of train dataset: {len(train_dataset)}\")\n",
"print(f\"Size of validation dataset: {len(val_dataset)}\")\n",
"\n",
"# Count the occurrences of each label in the training and validation sets\n",
"def count_labels(dataset):\n",
" label_counts = Counter()\n",
" for example in dataset:\n",
" labels = example['ner_tags']\n",
" for label in labels:\n",
" label_counts[id2label[label]] += 1\n",
" return label_counts\n",
"\n",
"train_label_counts = count_labels(train_dataset)\n",
"val_label_counts = count_labels(val_dataset)\n",
"\n",
"print(\"Training Label Counts:\")\n",
"print(train_label_counts)\n",
"\n",
"print(\"\\nValidation Label Counts:\")\n",
"print(val_label_counts)\n",
"\n",
"# Tokenization and Alignment Functions\n",
"checkpoint = 'Jean-Baptiste/roberta-large-ner-english'\n",
"tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
"\n",
"def align_labels(labels, word_ids):\n",
" aligned_labels = []\n",
" prev_word_id = None\n",
" for word_id in word_ids:\n",
" if word_id is None:\n",
" aligned_labels.append(-100)\n",
" elif word_id != prev_word_id:\n",
" aligned_labels.append(labels[word_id])\n",
" else:\n",
" aligned_labels.append(labels[word_id] if labels[word_id] != 0 else -100)\n",
" prev_word_id = word_id\n",
" return aligned_labels\n",
"\n",
"def tokenize_and_align_labels(examples):\n",
" tokenized_inputs = tokenizer(examples['tokens'], truncation=True, max_length=32, is_split_into_words=True)\n",
" all_labels = examples['ner_tags']\n",
" new_labels = [align_labels(labels, tokenized_inputs.word_ids(i)) for i, labels in enumerate(all_labels)]\n",
" tokenized_inputs['labels'] = new_labels\n",
" return tokenized_inputs\n",
"\n",
"tokenized_train_dataset = train_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=['Response', 'Metric', 'Entity'])\n",
"tokenized_val_dataset = val_dataset.map(tokenize_and_align_labels, batched=True, remove_columns=['Response', 'Metric', 'Entity'])\n",
"\n",
"# Define the data collator\n",
"data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)\n",
"\n",
"# Load metric\n",
"metric = load_metric('seqeval',trust_remote_code=True)\n",
"\n",
"# Custom loss function to give higher weight to metrics\n",
"class WeightedLoss(nn.CrossEntropyLoss):\n",
" def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'):\n",
" super(WeightedLoss, self).__init__(weight, size_average, ignore_index, reduce, reduction)\n",
"\n",
" def forward(self, input, target):\n",
" if self.weight is not None:\n",
" assert self.weight.dim() == 1\n",
" input = input * self.weight.unsqueeze(0).expand_as(input)\n",
" return super(WeightedLoss, self).forward(input, target)\n",
"\n",
"# Custom Trainer to use the weighted loss\n",
"class CustomTrainer(Trainer):\n",
" def compute_loss(self, model, inputs, return_outputs=False):\n",
" labels = inputs.get(\"labels\")\n",
" outputs = model(**inputs)\n",
" logits = outputs.get(\"logits\")\n",
" # Define the weights, giving higher weight to METRIC labels\n",
" class_weights = torch.tensor([0.4, 0.5, 0.5, 1.0, 1.0], device=logits.device)\n",
" loss_fct = WeightedLoss(weight=class_weights)\n",
" loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))\n",
" return (loss, outputs) if return_outputs else loss\n",
"\n",
"# Function to compute metrics\n",
"def compute_metrics(p):\n",
" predictions, labels = p\n",
" predictions = np.argmax(predictions, axis=2)\n",
" true_labels = [[id2label[label] for label in label_set if label != -100] for label_set in labels]\n",
" true_predictions = [[id2label[pred] for pred, label in zip(pred_set, label_set) if label != -100] for pred_set, label_set in zip(predictions, labels)]\n",
" results = metric.compute(predictions=true_predictions, references=true_labels, zero_division=0)\n",
"\n",
" # Calculate metrics for ENTITY\n",
" entity_results = metric.compute(\n",
" predictions=[[pred for pred, true in zip(pred_set, label_set) if true.startswith('B-ENTITY') or true.startswith('I-ENTITY')] for pred_set, label_set in zip(true_predictions, true_labels)],\n",
" references=[[true for true in label_set if true.startswith('B-ENTITY') or true.startswith('I-ENTITY')] for label_set in true_labels],\n",
" zero_division=0\n",
" )\n",
"\n",
" # Calculate metrics for METRIC\n",
" metric_results = metric.compute(\n",
" predictions=[[pred for pred, true in zip(pred_set, label_set) if true.startswith('B-METRIC') or true.startswith('I-METRIC')] for pred_set, label_set in zip(true_predictions, true_labels)],\n",
" references=[[true for true in label_set if true.startswith('B-METRIC') or true.startswith('I-METRIC')] for label_set in true_labels],\n",
" zero_division=0\n",
" )\n",
"\n",
" return {\n",
" \"overall_precision\": results[\"overall_precision\"],\n",
" \"overall_recall\": results[\"overall_recall\"],\n",
" \"overall_f1\": results[\"overall_f1\"],\n",
" \"overall_accuracy\": results[\"overall_accuracy\"],\n",
" \"entity_precision\": entity_results[\"overall_precision\"],\n",
" \"entity_recall\": entity_results[\"overall_recall\"],\n",
" \"entity_f1\": entity_results[\"overall_f1\"],\n",
" \"entity_accuracy\": entity_results[\"overall_accuracy\"],\n",
" \"metric_precision\": metric_results[\"overall_precision\"],\n",
" \"metric_recall\": metric_results[\"overall_recall\"],\n",
" \"metric_f1\": metric_results[\"overall_f1\"],\n",
" \"metric_accuracy\": metric_results[\"overall_accuracy\"],\n",
" }\n",
"\n",
"# Load the model\n",
"model = AutoModelForTokenClassification.from_pretrained(checkpoint, num_labels=len(labels), id2label=id2label, label2id=label2id,ignore_mismatched_sizes=True)\n",
"\n",
"# Custom callback to capture loss history\n",
"class LossHistoryCallback(TrainerCallback):\n",
" def __init__(self):\n",
" self.losses = []\n",
" self.eval_losses = []\n",
"\n",
" def on_log(self, args, state, control, logs=None, **kwargs):\n",
" if logs is not None:\n",
" if 'loss' in logs:\n",
" self.losses.append(logs['loss'])\n",
" if 'eval_loss' in logs:\n",
" self.eval_losses.append(logs['eval_loss'])\n",
"\n",
"loss_history_callback = LossHistoryCallback()\n",
"\n",
"# Training arguments\n",
"training_args = TrainingArguments(\n",
" output_dir='./results',\n",
" evaluation_strategy=IntervalStrategy.STEPS,\n",
" save_strategy=\"steps\",\n",
" logging_strategy=\"steps\",\n",
" learning_rate=1e-5,\n",
" num_train_epochs=10,\n",
" per_device_train_batch_size=32,\n",
" per_device_eval_batch_size=32, \n",
" gradient_accumulation_steps=4, # Accumulate gradients over 4 steps\n",
" warmup_steps=500,\n",
" weight_decay=0.01,\n",
" logging_dir='./logs',\n",
" logging_steps=10,\n",
" eval_steps=10,\n",
" save_steps=10,\n",
" load_best_model_at_end=True,\n",
" save_total_limit=1,\n",
")\n",
"\n",
"# Custom Trainer instance\n",
"trainer = CustomTrainer(\n",
" model=model,\n",
" args=training_args,\n",
" train_dataset=tokenized_train_dataset,\n",
" eval_dataset=tokenized_val_dataset,\n",
" tokenizer=tokenizer,\n",
" data_collator=data_collator,\n",
" compute_metrics=compute_metrics,\n",
" callbacks=[EarlyStoppingCallback(early_stopping_patience=2), loss_history_callback]\n",
")\n",
"\n",
"# Train the model\n",
"trainer.train()\n",
"\n",
"# Save the model\n",
"trainer.save_model('/home/ady/Stockbuzz.ai_Models/RouterNER/ner_model')\n",
"\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"NER Inference"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Entities and metrics and their average scores for text: 'What is the EPS for Apple from 2023, compare it against Intel?'\n",
"Predicted Entities: {'apple': [0.9999647]}\n",
"Predicted Metrics: {'eps': [0.99446386]}\n",
"\n",
"==================================================\n",
"\n",
"Entities and metrics and their average scores for text: 'What are the REVENUES of Amazon?'\n",
"Predicted Entities: {'amazon': [0.9999809]}\n",
"Predicted Metrics: {}\n",
"\n",
"==================================================\n",
"\n",
"Entities and metrics and their average scores for text: 'What is the annual income for Microsoft vs NVDA?'\n",
"Predicted Entities: {'microsoft': [0.9999985]}\n",
"Predicted Metrics: {}\n",
"\n",
"==================================================\n",
"\n",
"Entities and metrics and their average scores for text: 'What is the depreciation for ShaktiPumps in 2023?'\n",
"Predicted Entities: {}\n",
"Predicted Metrics: {}\n",
"\n",
"==================================================\n",
"\n",
"Entities and metrics and their average scores for text: 'What is the ttm net profits for Amazon in 2023?'\n",
"Predicted Entities: {'amazon': [0.9999693]}\n",
"Predicted Metrics: {'tm': [0.5082908]}\n",
"\n",
"==================================================\n",
"\n"
]
}
],
"source": [
"import time\n",
"import numpy as np\n",
"from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline\n",
"\n",
"checkpoint = \"ner_model\"\n",
"tokenizer_ner = AutoTokenizer.from_pretrained(checkpoint)\n",
"model_ner = AutoModelForTokenClassification.from_pretrained(checkpoint)\n",
"\n",
"# Initialize the NER pipeline without additional parameters\n",
"ner_model = pipeline('ner', model=model_ner, tokenizer=tokenizer_ner, aggregation_strategy='simple')\n",
"\n",
"# Enhanced post-processing\n",
"def extract_entities_with_scores(example_text):\n",
" # Tokenize the text with offset mappings\n",
" encoding = tokenizer_ner(example_text, return_offsets_mapping=True, truncation=True, max_length=32)\n",
" offset_mapping = encoding['offset_mapping']\n",
" tokens = tokenizer_ner.convert_ids_to_tokens(encoding['input_ids'])\n",
" \n",
" # Use the pipeline directly\n",
" ner_results = ner_model(example_text)\n",
" # print(ner_results)\n",
"\n",
" entities = {}\n",
" metrics = {}\n",
" current_entity = None\n",
"\n",
" for result in ner_results:\n",
" entity_type = result['entity_group']\n",
" start, end = result['start'], result['end']\n",
" score = result['score']\n",
" word = example_text[start:end].strip()\n",
" # print(word)\n",
"\n",
" # Check if current entity needs to be continued or started fresh\n",
" if current_entity and current_entity['entity_group'] == entity_type and (current_entity['end'] == start or current_entity['end'] + 1 == start):\n",
" # Continue the current entity\n",
" current_entity['word'] += ' ' + word if current_entity['end'] + 1 == start else word\n",
" current_entity['scores'].append(score)\n",
" current_entity['end'] = end\n",
" else:\n",
" # Save the previous entity\n",
" if current_entity:\n",
" entity_name = current_entity['word']\n",
" average_score = np.mean(current_entity['scores'])\n",
" if current_entity['entity_group'] == 'ENTITY':\n",
" if entity_name not in entities:\n",
" entities[entity_name] = []\n",
" entities[entity_name].append(average_score)\n",
" elif current_entity['entity_group'] == 'METRIC':\n",
" if entity_name not in metrics:\n",
" metrics[entity_name] = []\n",
" metrics[entity_name].append(average_score)\n",
"\n",
" # Start a new entity\n",
" current_entity = {'entity_group': entity_type, 'word': word, 'scores': [score], 'start': start, 'end': end}\n",
"\n",
" # Append the last entity or metric if exists\n",
" if current_entity:\n",
" entity_name = current_entity['word']\n",
" average_score = np.mean(current_entity['scores'])\n",
" if current_entity['entity_group'] == 'ENTITY':\n",
" if entity_name not in entities:\n",
" entities[entity_name] = []\n",
" entities[entity_name].append(average_score)\n",
" elif current_entity['entity_group'] == 'METRIC':\n",
" if entity_name not in metrics:\n",
" metrics[entity_name] = []\n",
" metrics[entity_name].append(average_score)\n",
"\n",
" return entities, metrics\n",
"\n",
"# Test the model with example texts\n",
"example_texts = [\n",
" \"What is the EPS for Apple from 2023, compare it against Intel?\",\n",
" \"What are the REVENUES of Amazon?\",\n",
" \"What is the annual income for Microsoft vs NVDA?\",\n",
" \"What is the depreciation for ShaktiPumps in 2023?\",\n",
" \"What is the ttm net profits for Amazon in 2023?\"\n",
"]\n",
"\n",
"## Test set \n",
"# example_texts = df_val.sample(n=5)['Response'].values.tolist()\n",
"\n",
"\n",
"for text in example_texts:\n",
" entities, metrics = extract_entities_with_scores(text.lower())\n",
"\n",
" print(f\"Entities and metrics and their average scores for text: '{text}'\")\n",
" print(f\"Predicted Entities: {entities}\")\n",
" print(f\"Predicted Metrics: {metrics}\")\n",
" print(\"\\n\" + \"=\"*50 + \"\\n\")\n"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Metric\n",
"pe ratio ttm 1156\n",
"return on assets ttm 635\n",
"dividend yield 615\n",
"piotroski score 612\n",
"return on capital employed 611\n",
"price to fair-value ttm 610\n",
"net-profit margin ttm 605\n",
"debt-equity ratio ttm 603\n",
"debt ratio ttm 598\n",
"operating profit margin ttm 597\n",
"total-debt growth 595\n",
"retained earnings 594\n",
"altman z score 591\n",
"pe ratio 589\n",
"ebit 589\n",
"dividend yield ttm 589\n",
"return on capital employed ttm 588\n",
"netdebt growth 584\n",
"growth in net-income-ratio 582\n",
"dividend yield percentage ttm 581\n",
"gross profit margin ttm 578\n",
"interest coverage ttm 578\n",
"peg ratio ttm 577\n",
"pe to growth ratio 576\n",
"dividend payout ratio 575\n",
"ebitda growth 574\n",
"pef ratio ttm 572\n",
"working capital 571\n",
"debt-equity ratio 571\n",
"revenue growth 571\n",
"eps growth 571\n",
"enterprise value multiple 566\n",
"current ratio ttm 562\n",
"total liabilities 562\n",
"dividend per share ttm 560\n",
"interest coverage 558\n",
"net-income growth 557\n",
"price to sales ratio ttm 555\n",
"quick ratio ttm 551\n",
"return on equity ttm 550\n",
"debt ratio 546\n",
"total assets 539\n",
"Name: count, dtype: int64"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_input_consolidated['Metric'].value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Send to HF"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Router"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Send to Huggingface\n",
"\n",
"from huggingface_hub import login\n",
"login(token=\"\")\n",
"\n",
"from huggingface_hub import upload_folder\n",
"\n",
"# Define the local directory and repo details\n",
"local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/Router_Models/\"\n",
"repo_id = \"Aditiyadav/Router\" # Your Hugging Face repo\n",
"\n",
"# Upload the folder directly to Hugging Face Hub\n",
"upload_folder(\n",
" repo_id=repo_id,\n",
" folder_path=local_model_path, # The local directory containing the model\n",
" commit_message=\"Uploading model from local directory\"\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Send to Huggingface\n",
"\n",
"from huggingface_hub import login\n",
"login(token=\"\")\n",
"\n",
"from huggingface_hub import upload_folder\n",
"\n",
"# Define the local directory and repo details\n",
"local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/SubRouter_Models/\"\n",
"repo_id = \"Aditiyadav/SubRouter\" # Your Hugging Face repo\n",
"\n",
"# Upload the folder directly to Hugging Face Hub\n",
"upload_folder(\n",
" repo_id=repo_id,\n",
" folder_path=local_model_path, # The local directory containing the model\n",
" commit_message=\"Uploading model from local directory\"\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"## Send to Huggingface\n",
"\n",
"from huggingface_hub import login\n",
"login(token=\"\")\n",
"\n",
"from huggingface_hub import upload_folder\n",
"\n",
"# Define the local directory and repo details\n",
"local_model_path = \"/home/ady/Stockbuzz.ai_Models/RouterNER/ner_model/\"\n",
"repo_id = \"Aditiyadav/NER\" # Your Hugging Face repo\n",
"\n",
"# Upload the folder directly to Hugging Face Hub\n",
"upload_folder(\n",
" repo_id=repo_id,\n",
" folder_path=local_model_path, # The local directory containing the model\n",
" commit_message=\"Uploading model from local directory\"\n",
")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "gpu_jan12",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 2
}