{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    }
   ],
   "source": [
    "import transformers\n",
    "\n",
    "from transformers import pipeline\n",
    "\n",
    "checkpoint = 'bert-base-uncased'\n",
    "classifier = pipeline('sentiment-analysis', model=checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'label': 'LABEL_1', 'score': 0.5578101277351379}]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "classifier('This is a test sentence')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[{'generated_text': 'In this course, I will teach you how to make a really big use of the language you\\u202are learning to use.\\u202a'},\n",
       " {'generated_text': 'In this course, I will teach you how to manipulate sound design to better enhance your sound design while also illustrating the application of certain audio and video technologies. In this section I will introduce some examples of how to manipulate sound design in my introductory video.'}]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "generator = pipeline('text-generation', model='distilgpt2')\n",
    "generator('In this course, I will teach you how to',\n",
    "          max_length=50, num_return_sequences = 2\n",
    "          )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.weight', 'classifier.bias']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[{'label': 'LABEL_0', 'score': 0.6686140894889832}]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# inside the pipeline function;\n",
    "\n",
    "from transformers import pipeline\n",
    "classifier = pipeline('sentiment-analysis', model=checkpoint)\n",
    "classifier('I am very sad')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Tokenization: Raw text -> Tokenizer -> Tokenized Text -> Input IDs for model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
    "\n",
    "raw_inputs = ['This is a course on huggingface',\n",
    "              'I am very disgusted at my stupidity']\n",
    "\n",
    "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': tensor([[  101,  2023,  2003,  1037,  2607,  2006, 17662, 12172,   102],\n",
       "        [  101,  1045,  2572,  2200, 17733,  2012,  2026, 28072,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
       "        [1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Encoding(num_tokens=9, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[  101,  2023,  2003,  1037,  2607,  2006, 17662, 12172,   102],\n",
       "        [  101,  1045,  2572,  2200, 17733,  2012,  2026, 28072,   102]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs['input_ids']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],\n",
       "        [1, 1, 1, 1, 1, 1, 1, 1, 1]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs['attention_mask']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
    "\n",
    "raw_inputs = ['This is very good','I am learning slowly.. sad']\n",
    "\n",
    "inputs=tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'input_ids': tensor([[ 101, 2023, 2003, 2200, 2204,  102,    0,    0,    0],\n",
       "        [ 101, 1045, 2572, 4083, 3254, 1012, 1012, 6517,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0],\n",
       "        [1, 1, 1, 1, 1, 1, 1, 1, 1]])}"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[ 101, 2023, 2003, 2200, 2204,  102,    0,    0,    0],\n",
       "        [ 101, 1045, 2572, 4083, 3254, 1012, 1012, 6517,  102]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs['input_ids']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0],\n",
       "        [1, 1, 1, 1, 1, 1, 1, 1, 1]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inputs['attention_mask']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "torch.Size([2, 9, 768])\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModel\n",
    "\n",
    "model = AutoModel.from_pretrained(checkpoint)\n",
    "outputs = model(**inputs)\n",
    "print(outputs.last_hidden_state.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'input_ids': tensor([[ 101, 2023, 2003, 2200, 2204,  102,    0,    0],\n",
      "        [ 101, 1045, 2572, 2667, 2000, 4553, 2242,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0],\n",
      "        [1, 1, 1, 1, 1, 1, 1, 1]])}\n",
      "torch.Size([2, 8, 768])\n",
      "torch.Size([2, 2])\n",
      "SequenceClassifierOutput(loss=None, logits=tensor([[-4.1928,  4.5727],\n",
      "        [ 1.9190, -1.6084]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)\n",
      "tensor([[-4.1928,  4.5727],\n",
      "        [ 1.9190, -1.6084]], grad_fn=<AddmmBackward0>)\n",
      "tensor([[1.5600e-04, 9.9984e-01],\n",
      "        [9.7146e-01, 2.8543e-02]], grad_fn=<SoftmaxBackward0>)\n",
      "{0: 'NEGATIVE', 1: 'POSITIVE'}\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "checkpoint = 'distilbert-base-uncased-finetuned-sst-2-english'\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
    "\n",
    "raw_inputs = ['This is very good', 'I am trying to learn something']\n",
    "\n",
    "inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors='pt')\n",
    "\n",
    "print(inputs)\n",
    "\n",
    "from transformers import AutoModel\n",
    "\n",
    "model = AutoModel.from_pretrained(checkpoint)\n",
    "outputs = model(**inputs)\n",
    "print(outputs.last_hidden_state.shape)\n",
    "\n",
    "from transformers import AutoModelForSequenceClassification\n",
    "\n",
    "model = AutoModelForSequenceClassification.from_pretrained(checkpoint)\n",
    "outputs = model(**inputs)\n",
    "print(outputs.logits.shape)\n",
    "print(outputs)\n",
    "print(outputs.logits)\n",
    "\n",
    "import torch\n",
    "\n",
    "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
    "print(predictions)\n",
    "\n",
    "print(model.config.id2label)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
      "<class 'transformers.models.gpt2.modeling_gpt2.GPT2Model'>\n",
      "<class 'transformers.models.bart.modeling_bart.BartModel'>\n"
     ]
    }
   ],
   "source": [
    "# instantiate a transformers model:\n",
    "\n",
    "from transformers import AutoModel\n",
    "\n",
    "bert_model = AutoModel.from_pretrained('bert-base-uncased')\n",
    "print(type(bert_model))\n",
    "\n",
    "gpt_model = AutoModel.from_pretrained('gpt2')\n",
    "print(type(gpt_model))\n",
    "\n",
    "bart_model = AutoModel.from_pretrained('facebook/bart-base')\n",
    "print(type(bart_model))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
      "<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
      "<class 'transformers.models.bart.configuration_bart.BartConfig'>\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoConfig\n",
    "\n",
    "bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
    "print(type(bert_config))\n",
    "\n",
    "gpt_config = AutoConfig.from_pretrained('gpt2')\n",
    "print(type(gpt_config))\n",
    "\n",
    "bart_config = AutoConfig.from_pretrained('facebook/bart-base')\n",
    "print(type(bart_config))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
      "<class 'transformers.models.gpt2.configuration_gpt2.GPT2Config'>\n",
      "<class 'transformers.models.bart.configuration_bart.BartConfig'>\n"
     ]
    }
   ],
   "source": [
    "from transformers import BertConfig\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
    "print(type(bert_config))\n",
    "\n",
    "from transformers import GPT2Config\n",
    "gpt_config = GPT2Config.from_pretrained('gpt2')\n",
    "print(type(gpt_config))\n",
    "\n",
    "from transformers import BartConfig\n",
    "bart_config = BartConfig.from_pretrained('facebook/bart-base')\n",
    "print(type(bart_config))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BertConfig {\n",
      "  \"architectures\": [\n",
      "    \"BertForMaskedLM\"\n",
      "  ],\n",
      "  \"attention_probs_dropout_prob\": 0.1,\n",
      "  \"classifier_dropout\": null,\n",
      "  \"gradient_checkpointing\": false,\n",
      "  \"hidden_act\": \"gelu\",\n",
      "  \"hidden_dropout_prob\": 0.1,\n",
      "  \"hidden_size\": 768,\n",
      "  \"initializer_range\": 0.02,\n",
      "  \"intermediate_size\": 3072,\n",
      "  \"layer_norm_eps\": 1e-12,\n",
      "  \"max_position_embeddings\": 512,\n",
      "  \"model_type\": \"bert\",\n",
      "  \"num_attention_heads\": 12,\n",
      "  \"num_hidden_layers\": 12,\n",
      "  \"pad_token_id\": 0,\n",
      "  \"position_embedding_type\": \"absolute\",\n",
      "  \"transformers_version\": \"4.34.1\",\n",
      "  \"type_vocab_size\": 2,\n",
      "  \"use_cache\": true,\n",
      "  \"vocab_size\": 30522\n",
      "}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from transformers import BertConfig\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
    "print(bert_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BertModel(\n",
      "  (embeddings): BertEmbeddings(\n",
      "    (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
      "    (position_embeddings): Embedding(512, 768)\n",
      "    (token_type_embeddings): Embedding(2, 768)\n",
      "    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
      "    (dropout): Dropout(p=0.1, inplace=False)\n",
      "  )\n",
      "  (encoder): BertEncoder(\n",
      "    (layer): ModuleList(\n",
      "      (0-11): 12 x BertLayer(\n",
      "        (attention): BertAttention(\n",
      "          (self): BertSelfAttention(\n",
      "            (query): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (key): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (value): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (dropout): Dropout(p=0.1, inplace=False)\n",
      "          )\n",
      "          (output): BertSelfOutput(\n",
      "            (dense): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
      "            (dropout): Dropout(p=0.1, inplace=False)\n",
      "          )\n",
      "        )\n",
      "        (intermediate): BertIntermediate(\n",
      "          (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
      "          (intermediate_act_fn): GELUActivation()\n",
      "        )\n",
      "        (output): BertOutput(\n",
      "          (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
      "          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
      "          (dropout): Dropout(p=0.1, inplace=False)\n",
      "        )\n",
      "      )\n",
      "    )\n",
      "  )\n",
      "  (pooler): BertPooler(\n",
      "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
      "    (activation): Tanh()\n",
      "  )\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "from transformers import BertConfig, BertModel\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
    "bert_model = BertModel(bert_config)\n",
    "print(bert_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import BertConfig, BertModel\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers=10)\n",
    "bert_model = BertModel(bert_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "BertModel(\n",
      "  (embeddings): BertEmbeddings(\n",
      "    (word_embeddings): Embedding(30522, 768, padding_idx=0)\n",
      "    (position_embeddings): Embedding(512, 768)\n",
      "    (token_type_embeddings): Embedding(2, 768)\n",
      "    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
      "    (dropout): Dropout(p=0.1, inplace=False)\n",
      "  )\n",
      "  (encoder): BertEncoder(\n",
      "    (layer): ModuleList(\n",
      "      (0-9): 10 x BertLayer(\n",
      "        (attention): BertAttention(\n",
      "          (self): BertSelfAttention(\n",
      "            (query): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (key): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (value): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (dropout): Dropout(p=0.1, inplace=False)\n",
      "          )\n",
      "          (output): BertSelfOutput(\n",
      "            (dense): Linear(in_features=768, out_features=768, bias=True)\n",
      "            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
      "            (dropout): Dropout(p=0.1, inplace=False)\n",
      "          )\n",
      "        )\n",
      "        (intermediate): BertIntermediate(\n",
      "          (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
      "          (intermediate_act_fn): GELUActivation()\n",
      "        )\n",
      "        (output): BertOutput(\n",
      "          (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
      "          (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
      "          (dropout): Dropout(p=0.1, inplace=False)\n",
      "        )\n",
      "      )\n",
      "    )\n",
      "  )\n",
      "  (pooler): BertPooler(\n",
      "    (dense): Linear(in_features=768, out_features=768, bias=True)\n",
      "    (activation): Tanh()\n",
      "  )\n",
      ")\n"
     ]
    }
   ],
   "source": [
    "print(bert_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import BertConfig, BertModel\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers = 10)\n",
    "bert_model = BertModel(bert_config)\n",
    "\n",
    "bert_model.save_pretrained('my_bert_model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import BertModel\n",
    "\n",
    "bert_model = BertModel.from_pretrained('my_bert_model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
      "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
      "<class 'transformers.models.bert.modeling_bert.BertModel'>\n",
      "<class 'transformers.models.bert.configuration_bert.BertConfig'>\n",
      "<class 'transformers.models.bert.modeling_bert.BertModel'>\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModel\n",
    "\n",
    "bert_model = AutoModel.from_pretrained('bert-base-uncased')\n",
    "print(type(bert_model))\n",
    "\n",
    "from transformers import AutoConfig, BertModel\n",
    "\n",
    "bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
    "print(type(bert_config))\n",
    "bert_model = BertModel(bert_config)\n",
    "print(type(bert_model))\n",
    "\n",
    "from transformers import BertConfig, BertModel\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
    "print(type(bert_config))\n",
    "bert_model = BertModel(bert_config)\n",
    "print(type(bert_model))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "BertConfig {\n",
       "  \"architectures\": [\n",
       "    \"BertForMaskedLM\"\n",
       "  ],\n",
       "  \"attention_probs_dropout_prob\": 0.1,\n",
       "  \"classifier_dropout\": null,\n",
       "  \"gradient_checkpointing\": false,\n",
       "  \"hidden_act\": \"gelu\",\n",
       "  \"hidden_dropout_prob\": 0.1,\n",
       "  \"hidden_size\": 768,\n",
       "  \"initializer_range\": 0.02,\n",
       "  \"intermediate_size\": 3072,\n",
       "  \"layer_norm_eps\": 1e-12,\n",
       "  \"max_position_embeddings\": 512,\n",
       "  \"model_type\": \"bert\",\n",
       "  \"num_attention_heads\": 12,\n",
       "  \"num_hidden_layers\": 12,\n",
       "  \"pad_token_id\": 0,\n",
       "  \"position_embedding_type\": \"absolute\",\n",
       "  \"transformers_version\": \"4.34.1\",\n",
       "  \"type_vocab_size\": 2,\n",
       "  \"use_cache\": true,\n",
       "  \"vocab_size\": 30522\n",
       "}"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from transformers import BertConfig\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
    "bert_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "from transformers import BertConfig, BertModel\n",
    "new_bert_config = BertConfig.from_pretrained('bert-base-uncased', num_hidden_layers=10)\n",
    "new_bert_model = BertModel(new_bert_config)\n",
    "\n",
    "new_bert_model.save_pretrained('new-bert-model')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [],
   "source": [
    "load_new_bert_model = BertModel.from_pretrained('new-bert-model')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['let', \"'\", 's', 'try', 'to', 'token', '##ize']\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['▁let', \"'\", 's', '▁try', '▁to', '▁to', 'ken', 'ize']\n",
      "[2292, 1005, 1055, 3046, 2000, 19204, 4697]\n",
      "[101, 2292, 1005, 1055, 3046, 2000, 19204, 4697, 102]\n",
      "{'input_ids': [101, 2292, 1005, 1055, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}\n",
      "[CLS] let's try to tokenize [SEP]\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "# split our input into tokens:\n",
    "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
    "tokens = tokenizer.tokenize(\"Let's try to tokenize\")\n",
    "print(tokens)\n",
    "\n",
    "albert_tokenizer = AutoTokenizer.from_pretrained('albert-base-v1')\n",
    "albert_tokens = albert_tokenizer.tokenize(\"Let's try to tokenize\")\n",
    "print(albert_tokens)\n",
    "\n",
    "# map tokens to respective ids:\n",
    "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
    "print(input_ids)\n",
    "\n",
    "# add special tokens:\n",
    "final_inputs = tokenizer.prepare_for_model(input_ids)\n",
    "print(final_inputs['input_ids'])\n",
    "print(final_inputs)\n",
    "\n",
    "print(tokenizer.decode(final_inputs['input_ids']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['lets', 'try', 'to', 'token', '##ize']\n",
      "[11082, 3046, 2000, 19204, 4697]\n",
      "{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n",
      "[CLS] lets try to tokenize [SEP]\n",
      "{'input_ids': [101, 11082, 3046, 2000, 19204, 4697, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1]}\n",
      "[101, 11082, 3046, 2000, 19204, 4697, 102]\n",
      "[CLS] lets try to tokenize [SEP]\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoTokenizer\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
    "tokens = tokenizer.tokenize(\"lets try to tokenize\")\n",
    "print(tokens)\n",
    "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
    "print(input_ids)\n",
    "final_inputs = tokenizer.prepare_for_model(input_ids)\n",
    "print(final_inputs)\n",
    "decoded_inputs = tokenizer.decode(final_inputs['input_ids'])\n",
    "print(decoded_inputs)\n",
    "\n",
    "from transformers import AutoTokenizer\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
    "inputs = tokenizer('lets try to tokenize')\n",
    "print(inputs)\n",
    "print(inputs['input_ids'])\n",
    "print(tokenizer.decode(inputs['input_ids']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.\n"
     ]
    },
    {
     "ename": "AttributeError",
     "evalue": "'list' object has no attribute 'size'",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
      "\u001b[1;32mc:\\Users\\HP\\Desktop\\PythonProjects\\HuggingFace_Beginners\\practise basics.ipynb Cell 32\u001b[0m line \u001b[0;36m1\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=8'>9</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m BertModel\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=9'>10</a>\u001b[0m Bert_model \u001b[39m=\u001b[39m BertModel(Bert_config)\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=10'>11</a>\u001b[0m outputs \u001b[39m=\u001b[39m Bert_model(\u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49minputs)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m \u001b[39mprint\u001b[39m(outputs\u001b[39m.\u001b[39mlast_hidden_state\u001b[39m.\u001b[39mshape)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/HP/Desktop/PythonProjects/HuggingFace_Beginners/practise%20basics.ipynb#X43sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m AutoModelForSequenceClassification\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\nn\\modules\\module.py:1518\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1516\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_compiled_call_impl(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)  \u001b[39m# type: ignore[misc]\u001b[39;00m\n\u001b[0;32m   1517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m-> 1518\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_call_impl(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\torch\\nn\\modules\\module.py:1527\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1522\u001b[0m \u001b[39m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[0;32m   1523\u001b[0m \u001b[39m# this function, and just call forward.\u001b[39;00m\n\u001b[0;32m   1524\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m (\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_backward_pre_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_hooks \u001b[39mor\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward_pre_hooks\n\u001b[0;32m   1525\u001b[0m         \u001b[39mor\u001b[39;00m _global_backward_pre_hooks \u001b[39mor\u001b[39;00m _global_backward_hooks\n\u001b[0;32m   1526\u001b[0m         \u001b[39mor\u001b[39;00m _global_forward_hooks \u001b[39mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[1;32m-> 1527\u001b[0m     \u001b[39mreturn\u001b[39;00m forward_call(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[0;32m   1529\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m   1530\u001b[0m     result \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n",
      "File \u001b[1;32m~\\AppData\\Roaming\\Python\\Python311\\site-packages\\transformers\\models\\bert\\modeling_bert.py:970\u001b[0m, in \u001b[0;36mBertModel.forward\u001b[1;34m(self, input_ids, attention_mask, token_type_ids, position_ids, head_mask, inputs_embeds, encoder_hidden_states, encoder_attention_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[0;32m    968\u001b[0m \u001b[39melif\u001b[39;00m input_ids \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    969\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwarn_if_padding_and_no_attention_mask(input_ids, attention_mask)\n\u001b[1;32m--> 970\u001b[0m     input_shape \u001b[39m=\u001b[39m input_ids\u001b[39m.\u001b[39;49msize()\n\u001b[0;32m    971\u001b[0m \u001b[39melif\u001b[39;00m inputs_embeds \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m    972\u001b[0m     input_shape \u001b[39m=\u001b[39m inputs_embeds\u001b[39m.\u001b[39msize()[:\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n",
      "\u001b[1;31mAttributeError\u001b[0m: 'list' object has no attribute 'size'"
     ]
    }
   ],
   "source": [
    "from transformers import pipeline\n",
    "from transformers import AutoTokenizer, AutoModel, AutoModelForSequenceClassification, AutoConfig\n",
    "\n",
    "tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')\n",
    "tokens = tokenizer.tokenize(\"Let's try to tokenize\")\n",
    "input_ids = tokenizer.convert_tokens_to_ids(tokens)\n",
    "inputs = tokenizer.prepare_for_model(input_ids)\n",
    "Bert_config = AutoConfig.from_pretrained('bert-base-uncased')\n",
    "from transformers import BertModel\n",
    "Bert_model = BertModel(Bert_config)\n",
    "outputs = Bert_model(**inputs)\n",
    "print(outputs.last_hidden_state.shape)\n",
    "from transformers import AutoModelForSequenceClassification\n",
    "model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')\n",
    "outputs = model(**inputs)\n",
    "print(outputs.logits)\n",
    "import torch\n",
    "predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)\n",
    "print(predictions)\n",
    "print(model.config.id2label)\n",
    "\n",
    "from transformers import AutoModel, AutoConfig, BertModel, BertConfig\n",
    "\n",
    "bert_config = BertConfig.from_pretrained('bert-base-uncased')\n",
    "bert_model = BertModel(bert_config)\n",
    "bert_model.save_pretrained(bert_model)\n",
    "initialize_model = BertModel.from_pretrained('bert_model')\n",
    "outputs = initialize_model(**inputs)\n",
    "print(outputs.last_hidden_state.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}