File size: 21,869 Bytes
5ab8a9f
1
{"cells":[{"cell_type":"markdown","source":["# `Web Scraping`\n"],"metadata":{"id":"wNfH3-kA5Usr"}},{"cell_type":"markdown","source":["# Introduction to web scraping with BeautifulSoup\n","* Web scraping involves extracting data from websites.\n","* Useful for gathering information easily.\n","* BeautifulSoup, from the bs4 package, is a popular Python library for web scraping.\n","* Simplifies parsing of HTML and XML documents to extract desired information.\n","\n","* Reference: [brightdata](https://brightdata.com/blog/how-tos/beautiful-soup-web-scraping#:~:text=Web%20Scraping%20with%20Beautiful%20Soup&text=The%20library%20automatically%20selects%20the,fast%20and%20efficient%20lxml%20parser.)\n","* User Agent: [whatismybrowser](https://www.whatismybrowser.com/detect/what-is-my-user-agent/)"],"metadata":{"id":"pp3qeU7J5Bqb"}},{"cell_type":"markdown","source":["Installation and Declaration"],"metadata":{"id":"-7U3woPO6i-5"}},{"cell_type":"code","source":["# %pip install beautifulsoup4 requests"],"metadata":{"id":"IWesAnWQ6XZH"},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":["Fetching Web Content"],"metadata":{"id":"sNfUIUU17PGr"}},{"cell_type":"code","source":["# Fetching Web Content\n","import requests\n","url = \"https://walid.vercel.app\"\n","headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36\"}\n","response = requests.get(url,headers)\n","print(response)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ZtyuOw5u60XX","executionInfo":{"status":"ok","timestamp":1734172922647,"user_tz":-360,"elapsed":3487,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"7110f21e-24c3-4abe-97cd-a23de2776505"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["<Response [200]>\n"]}]},{"cell_type":"code","source":["# Parsing the HTML\n","from bs4 import BeautifulSoup\n","soup = BeautifulSoup(response.content, \"html.parser\")"],"metadata":{"id":"Nm_Sd86T7AOW","executionInfo":{"status":"ok","timestamp":1734173000239,"user_tz":-360,"elapsed":347,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}}},"execution_count":2,"outputs":[]},{"cell_type":"markdown","source":["Basic Operations with BeautifulSoup"],"metadata":{"id":"UXxbjr0l8Bu6"}},{"cell_type":"code","source":["# Find tag name\n","title = soup.find(\"title\")\n","print(title)\n","print(title.text)\n","print(title.contents)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"yxzR1JJ-7bwe","executionInfo":{"status":"ok","timestamp":1734173006245,"user_tz":-360,"elapsed":349,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"71c27510-0fc5-4418-c152-e7d5d2d51c40"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["<title>Walid</title>\n","Walid\n","['Walid']\n"]}]},{"cell_type":"code","source":["# All occurrences of a tag\n","links = soup.find_all(\"a\")"],"metadata":{"id":"VR7-pWDE7bs-","executionInfo":{"status":"ok","timestamp":1734173054542,"user_tz":-360,"elapsed":455,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}}},"execution_count":4,"outputs":[]},{"cell_type":"markdown","source":["**Practical exercises**"],"metadata":{"id":"1YOwsGSk8z63"}},{"cell_type":"code","source":["import re\n","lst = []\n","for i in links:\n","  result = re.findall(r'https:\\/\\/(?:www\\.)?[a-zA-Z0-9-]+\\.[a-zA-Z]{2,}(?:\\/[a-zA-Z0-9._~:/?#@!$&\\'()*+,;=%-]*)?', str(i))\n","  if result:\n","    lst.append(result)\n","print(lst)\n","\n","new_lst = []\n","for i in lst:\n","  new_lst.append(''.join(i))\n","print(new_lst)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uBBRevC724cZ","executionInfo":{"status":"ok","timestamp":1734173240756,"user_tz":-360,"elapsed":351,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"1bb162e4-a819-4de7-dcff-46c48c2a4950"},"execution_count":6,"outputs":[{"output_type":"stream","name":"stdout","text":["[['https://www.linkedin.com/in/munsiwalidalhassannizhu'], ['https://www.facebook.com/whalidmunshi'], ['https://huggingface.co/WalidAlHassan'], ['https://github.com/walid3271'], ['https://huggingface.co/WalidAlHassan/Face-Detection-Using-URL'], ['https://huggingface.co/WalidAlHassan/Floor-Object-Rooms-and-Bed-direction-Identification-according-to-Vastu-angle'], ['https://huggingface.co/WalidAlHassan/GMP_Face_Authentication'], ['https://huggingface.co/WalidAlHassan/Find-Direction-Of-A-Bolt'], ['https://huggingface.co/WalidAlHassan/Virtual-Mouse'], ['https://huggingface.co/WalidAlHassan/ChatBot'], ['https://chatbot-with-gemini.streamlit'], ['https://huggingface.co/WalidAlHassan/ChatBot-Gemini'], ['https://huggingface.co/WalidAlHassan/Romero-ChatBot'], ['https://huggingface.co/WalidAlHassan/SCREW-APP'], ['https://huggingface.co/WalidAlHassan/Conveyor-Belt-Screw-Count'], ['https://walid.vercel'], ['https://www.linkedin.com/in/munsiwalidalhassannizhu'], ['https://www.facebook.com/whalidmunshi'], ['https://huggingface.co/WalidAlHassan'], ['https://github.com/walid3271']]\n","['https://www.linkedin.com/in/munsiwalidalhassannizhu', 'https://www.facebook.com/whalidmunshi', 'https://huggingface.co/WalidAlHassan', 'https://github.com/walid3271', 'https://huggingface.co/WalidAlHassan/Face-Detection-Using-URL', 'https://huggingface.co/WalidAlHassan/Floor-Object-Rooms-and-Bed-direction-Identification-according-to-Vastu-angle', 'https://huggingface.co/WalidAlHassan/GMP_Face_Authentication', 'https://huggingface.co/WalidAlHassan/Find-Direction-Of-A-Bolt', 'https://huggingface.co/WalidAlHassan/Virtual-Mouse', 'https://huggingface.co/WalidAlHassan/ChatBot', 'https://chatbot-with-gemini.streamlit', 'https://huggingface.co/WalidAlHassan/ChatBot-Gemini', 'https://huggingface.co/WalidAlHassan/Romero-ChatBot', 'https://huggingface.co/WalidAlHassan/SCREW-APP', 'https://huggingface.co/WalidAlHassan/Conveyor-Belt-Screw-Count', 'https://walid.vercel', 'https://www.linkedin.com/in/munsiwalidalhassannizhu', 'https://www.facebook.com/whalidmunshi', 'https://huggingface.co/WalidAlHassan', 'https://github.com/walid3271']\n"]}]},{"cell_type":"code","source":["import requests\n","from bs4 import BeautifulSoup\n","import pandas as pd\n","\n","url = \"http://quotes.toscrape.com/\"\n","headers = {\"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36\"}\n","response = requests.get(url, headers)\n","# print(response.content)\n","print('Status: ',response)\n","soup = BeautifulSoup(response.text, \"html.parser\")\n","\n","quotes = soup.find_all(\"span\", attrs={\"class\":\"text\"})\n","authors = soup.find_all(\"small\", attrs={\"class\":\"author\"})\n","\n","qu = []\n","for quote, author in zip(quotes, authors):\n","    qu.append({\"Quote\": quote.text, \"Author\": author.text})\n","\n","csv_file = \"Quote.csv\"\n","df = pd.DataFrame(qu)\n","df.to_csv(csv_file, index=False, encoding=\"utf-8\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"qKQp6kpX7bja","executionInfo":{"status":"ok","timestamp":1734173627017,"user_tz":-360,"elapsed":1372,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"253cf8ec-5e14-41c1-e3b9-3323ff18a9d3"},"execution_count":8,"outputs":[{"output_type":"stream","name":"stdout","text":["Status:  <Response [200]>\n"]}]},{"cell_type":"code","source":["df"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":363},"id":"KdYnO-sH88YW","executionInfo":{"status":"ok","timestamp":1734173631548,"user_tz":-360,"elapsed":360,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"3699902c-bb7c-4791-8e1e-402ac7aa75cc"},"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["                                               Quote             Author\n","0  “The world as we have created it is a process ...    Albert Einstein\n","1  “It is our choices, Harry, that show what we t...       J.K. Rowling\n","2  “There are only two ways to live your life. On...    Albert Einstein\n","3  “The person, be it gentleman or lady, who has ...        Jane Austen\n","4  “Imperfection is beauty, madness is genius and...     Marilyn Monroe\n","5  “Try not to become a man of success. Rather be...    Albert Einstein\n","6  “It is better to be hated for what you are tha...         André Gide\n","7  “I have not failed. I've just found 10,000 way...   Thomas A. Edison\n","8  “A woman is like a tea bag; you never know how...  Eleanor Roosevelt\n","9  “A day without sunshine is like, you know, nig...       Steve Martin"],"text/html":["\n","  <div id=\"df-31329006-48d2-438d-a276-b0ff4c35bf04\" class=\"colab-df-container\">\n","    <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>Quote</th>\n","      <th>Author</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>“The world as we have created it is a process ...</td>\n","      <td>Albert Einstein</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>“It is our choices, Harry, that show what we t...</td>\n","      <td>J.K. Rowling</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>“There are only two ways to live your life. On...</td>\n","      <td>Albert Einstein</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>“The person, be it gentleman or lady, who has ...</td>\n","      <td>Jane Austen</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>“Imperfection is beauty, madness is genius and...</td>\n","      <td>Marilyn Monroe</td>\n","    </tr>\n","    <tr>\n","      <th>5</th>\n","      <td>“Try not to become a man of success. Rather be...</td>\n","      <td>Albert Einstein</td>\n","    </tr>\n","    <tr>\n","      <th>6</th>\n","      <td>“It is better to be hated for what you are tha...</td>\n","      <td>André Gide</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>“I have not failed. I've just found 10,000 way...</td>\n","      <td>Thomas A. Edison</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>“A woman is like a tea bag; you never know how...</td>\n","      <td>Eleanor Roosevelt</td>\n","    </tr>\n","    <tr>\n","      <th>9</th>\n","      <td>“A day without sunshine is like, you know, nig...</td>\n","      <td>Steve Martin</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","    <div class=\"colab-df-buttons\">\n","\n","  <div class=\"colab-df-container\">\n","    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-31329006-48d2-438d-a276-b0ff4c35bf04')\"\n","            title=\"Convert this dataframe to an interactive table.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n","    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n","  </svg>\n","    </button>\n","\n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    .colab-df-buttons div {\n","      margin-bottom: 4px;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","    <script>\n","      const buttonEl =\n","        document.querySelector('#df-31329006-48d2-438d-a276-b0ff4c35bf04 button.colab-df-convert');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      async function convertToInteractive(key) {\n","        const element = document.querySelector('#df-31329006-48d2-438d-a276-b0ff4c35bf04');\n","        const dataTable =\n","          await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                    [key], {});\n","        if (!dataTable) return;\n","\n","        const docLinkHtml = 'Like what you see? Visit the ' +\n","          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","          + ' to learn more about interactive tables.';\n","        element.innerHTML = '';\n","        dataTable['output_type'] = 'display_data';\n","        await google.colab.output.renderOutput(dataTable, element);\n","        const docLink = document.createElement('div');\n","        docLink.innerHTML = docLinkHtml;\n","        element.appendChild(docLink);\n","      }\n","    </script>\n","  </div>\n","\n","\n","<div id=\"df-208b0142-0db3-4a65-b815-3bb2f4fdca65\">\n","  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-208b0142-0db3-4a65-b815-3bb2f4fdca65')\"\n","            title=\"Suggest charts\"\n","            style=\"display:none;\">\n","\n","<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","     width=\"24px\">\n","    <g>\n","        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n","    </g>\n","</svg>\n","  </button>\n","\n","<style>\n","  .colab-df-quickchart {\n","      --bg-color: #E8F0FE;\n","      --fill-color: #1967D2;\n","      --hover-bg-color: #E2EBFA;\n","      --hover-fill-color: #174EA6;\n","      --disabled-fill-color: #AAA;\n","      --disabled-bg-color: #DDD;\n","  }\n","\n","  [theme=dark] .colab-df-quickchart {\n","      --bg-color: #3B4455;\n","      --fill-color: #D2E3FC;\n","      --hover-bg-color: #434B5C;\n","      --hover-fill-color: #FFFFFF;\n","      --disabled-bg-color: #3B4455;\n","      --disabled-fill-color: #666;\n","  }\n","\n","  .colab-df-quickchart {\n","    background-color: var(--bg-color);\n","    border: none;\n","    border-radius: 50%;\n","    cursor: pointer;\n","    display: none;\n","    fill: var(--fill-color);\n","    height: 32px;\n","    padding: 0;\n","    width: 32px;\n","  }\n","\n","  .colab-df-quickchart:hover {\n","    background-color: var(--hover-bg-color);\n","    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n","    fill: var(--button-hover-fill-color);\n","  }\n","\n","  .colab-df-quickchart-complete:disabled,\n","  .colab-df-quickchart-complete:disabled:hover {\n","    background-color: var(--disabled-bg-color);\n","    fill: var(--disabled-fill-color);\n","    box-shadow: none;\n","  }\n","\n","  .colab-df-spinner {\n","    border: 2px solid var(--fill-color);\n","    border-color: transparent;\n","    border-bottom-color: var(--fill-color);\n","    animation:\n","      spin 1s steps(1) infinite;\n","  }\n","\n","  @keyframes spin {\n","    0% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","      border-left-color: var(--fill-color);\n","    }\n","    20% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    30% {\n","      border-color: transparent;\n","      border-left-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","      border-right-color: var(--fill-color);\n","    }\n","    40% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-top-color: var(--fill-color);\n","    }\n","    60% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","    }\n","    80% {\n","      border-color: transparent;\n","      border-right-color: var(--fill-color);\n","      border-bottom-color: var(--fill-color);\n","    }\n","    90% {\n","      border-color: transparent;\n","      border-bottom-color: var(--fill-color);\n","    }\n","  }\n","</style>\n","\n","  <script>\n","    async function quickchart(key) {\n","      const quickchartButtonEl =\n","        document.querySelector('#' + key + ' button');\n","      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n","      quickchartButtonEl.classList.add('colab-df-spinner');\n","      try {\n","        const charts = await google.colab.kernel.invokeFunction(\n","            'suggestCharts', [key], {});\n","      } catch (error) {\n","        console.error('Error during call to suggestCharts:', error);\n","      }\n","      quickchartButtonEl.classList.remove('colab-df-spinner');\n","      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n","    }\n","    (() => {\n","      let quickchartButtonEl =\n","        document.querySelector('#df-208b0142-0db3-4a65-b815-3bb2f4fdca65 button');\n","      quickchartButtonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","    })();\n","  </script>\n","</div>\n","\n","  <div id=\"id_8b8b7829-b0f2-42f9-b61c-b7b8bc1629de\">\n","    <style>\n","      .colab-df-generate {\n","        background-color: #E8F0FE;\n","        border: none;\n","        border-radius: 50%;\n","        cursor: pointer;\n","        display: none;\n","        fill: #1967D2;\n","        height: 32px;\n","        padding: 0 0 0 0;\n","        width: 32px;\n","      }\n","\n","      .colab-df-generate:hover {\n","        background-color: #E2EBFA;\n","        box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","        fill: #174EA6;\n","      }\n","\n","      [theme=dark] .colab-df-generate {\n","        background-color: #3B4455;\n","        fill: #D2E3FC;\n","      }\n","\n","      [theme=dark] .colab-df-generate:hover {\n","        background-color: #434B5C;\n","        box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","        filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","        fill: #FFFFFF;\n","      }\n","    </style>\n","    <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df')\"\n","            title=\"Generate code using this dataframe.\"\n","            style=\"display:none;\">\n","\n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n","  </svg>\n","    </button>\n","    <script>\n","      (() => {\n","      const buttonEl =\n","        document.querySelector('#id_8b8b7829-b0f2-42f9-b61c-b7b8bc1629de button.colab-df-generate');\n","      buttonEl.style.display =\n","        google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","      buttonEl.onclick = () => {\n","        google.colab.notebook.generateWithVariable('df');\n","      }\n","      })();\n","    </script>\n","  </div>\n","\n","    </div>\n","  </div>\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"df","summary":"{\n  \"name\": \"df\",\n  \"rows\": 10,\n  \"fields\": [\n    {\n      \"column\": \"Quote\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 10,\n        \"samples\": [\n          \"\\u201cA woman is like a tea bag; you never know how strong it is until it's in hot water.\\u201d\",\n          \"\\u201cIt is our choices, Harry, that show what we truly are, far more than our abilities.\\u201d\",\n          \"\\u201cTry not to become a man of success. Rather become a man of value.\\u201d\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Author\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 8,\n        \"samples\": [\n          \"J.K. Rowling\",\n          \"Thomas A. Edison\",\n          \"Albert Einstein\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"}},"metadata":{},"execution_count":9}]}],"metadata":{"kernelspec":{"display_name":"pp","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.12.7"},"colab":{"provenance":[],"collapsed_sections":["pp3qeU7J5Bqb"]}},"nbformat":4,"nbformat_minor":0}