{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "5f93b7d1", "metadata": { "ExecuteTime": { "end_time": "2023-05-30T08:37:58.711225Z", "start_time": "2023-05-30T08:37:56.881307Z" } }, "outputs": [], "source": [ "import os\n", "\n", "import torch\n", "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup\n", "from peft import get_peft_model, PromptTuningConfig, TaskType, PromptTuningInit\n", "from torch.utils.data import DataLoader\n", "from tqdm import tqdm\n", "from datasets import load_dataset\n", "\n", "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n", "\n", "device = torch.accelerator.current_accelerator().type if hasattr(torch, \"accelerator\") else \"cuda\"\n", "model_name_or_path = \"t5-large\"\n", "tokenizer_name_or_path = \"t5-large\"\n", "\n", "checkpoint_name = \"financial_sentiment_analysis_prompt_tuning_v1.pt\"\n", "text_column = \"sentence\"\n", "label_column = \"text_label\"\n", "max_length = 128\n", "lr = 1\n", "num_epochs = 5\n", "batch_size = 8" ] }, { "cell_type": "code", "execution_count": 2, "id": "8d0850ac", "metadata": { "ExecuteTime": { "end_time": "2023-05-30T08:38:12.413984Z", "start_time": "2023-05-30T08:38:04.601042Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "trainable params: 40,960 || all params: 737,709,056 || trainable%: 0.0056\n" ] }, { "data": { "text/plain": [ "PeftModelForSeq2SeqLM(\n", " (base_model): T5ForConditionalGeneration(\n", " (shared): Embedding(32128, 1024)\n", " (encoder): T5Stack(\n", " (embed_tokens): Embedding(32128, 1024)\n", " (block): ModuleList(\n", " (0): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " (relative_attention_bias): Embedding(32, 16)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (1-23): 23 x T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (final_layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (decoder): T5Stack(\n", " (embed_tokens): Embedding(32128, 1024)\n", " (block): ModuleList(\n", " (0): T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " (relative_attention_bias): Embedding(32, 16)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerCrossAttention(\n", " (EncDecAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (2): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " (1-23): 23 x T5Block(\n", " (layer): ModuleList(\n", " (0): T5LayerSelfAttention(\n", " (SelfAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (1): T5LayerCrossAttention(\n", " (EncDecAttention): T5Attention(\n", " (q): Linear(in_features=1024, out_features=1024, bias=False)\n", " (k): Linear(in_features=1024, out_features=1024, bias=False)\n", " (v): Linear(in_features=1024, out_features=1024, bias=False)\n", " (o): Linear(in_features=1024, out_features=1024, bias=False)\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (2): T5LayerFF(\n", " (DenseReluDense): T5DenseActDense(\n", " (wi): Linear(in_features=1024, out_features=4096, bias=False)\n", " (wo): Linear(in_features=4096, out_features=1024, bias=False)\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " (act): ReLU()\n", " )\n", " (layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " )\n", " )\n", " )\n", " (final_layer_norm): T5LayerNorm()\n", " (dropout): Dropout(p=0.1, inplace=False)\n", " )\n", " (lm_head): Linear(in_features=1024, out_features=32128, bias=False)\n", " )\n", " (prompt_encoder): ModuleDict(\n", " (default): PromptEmbedding(\n", " (embedding): Embedding(40, 1024)\n", " )\n", " )\n", " (word_embeddings): Embedding(32128, 1024)\n", ")" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# creating model\n", "peft_config = PromptTuningConfig(\n", " task_type=TaskType.SEQ_2_SEQ_LM,\n", " prompt_tuning_init=PromptTuningInit.TEXT,\n", " num_virtual_tokens=20,\n", " prompt_tuning_init_text=\"What is the sentiment of this article?\\n\",\n", " inference_mode=False,\n", " tokenizer_name_or_path=model_name_or_path,\n", ")\n", "\n", "model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)\n", "model = get_peft_model(model, peft_config)\n", "model.print_trainable_parameters()\n", "model" ] }, { "cell_type": "code", "execution_count": 3, "id": "4ee2babf", "metadata": { "ExecuteTime": { "end_time": "2023-05-30T08:38:18.759143Z", "start_time": "2023-05-30T08:38:17.881621Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using the latest cached version of the dataset since financial_phrasebank couldn't be found on the Hugging Face Hub\n", "Found the latest cached dataset configuration 'sentences_allagree' at /root/.cache/huggingface/datasets/financial_phrasebank/sentences_allagree/1.0.0/550bde12e6c30e2674da973a55f57edde5181d53f5a5a34c1531c53f93b7e141 (last modified on Thu Jul 31 06:37:38 2025).\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2b64258700bd40548ddcd626f3920c9a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/2037 [00:00