{"cells":[{"cell_type":"code","execution_count":30,"id":"63eac0a4","metadata":{"id":"63eac0a4","executionInfo":{"status":"ok","timestamp":1736407450159,"user_tz":-360,"elapsed":322,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}}},"outputs":[],"source":["from bs4 import BeautifulSoup\n","import requests\n","import pandas as pd\n","import numpy as np"]},{"cell_type":"code","execution_count":31,"id":"8ca27ab4","metadata":{"id":"8ca27ab4","executionInfo":{"status":"ok","timestamp":1736407450513,"user_tz":-360,"elapsed":2,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}}},"outputs":[],"source":["# Function to extract Product Title\n","def get_title(soup):\n","\n"," try:\n"," # Outer Tag Object\n"," title_string = soup.find(\"span\", attrs={\"id\":'productTitle'}).text.strip()\n","\n"," except AttributeError:\n"," title_string = \"\"\n","\n"," return title_string\n","\n","# Function to extract Product Price\n","def get_price(soup):\n","\n"," try:\n"," price = soup.find(\"span\", attrs={'class':'a-price aok-align-center reinventPricePriceToPayMargin priceToPay'}).text.strip()\n","\n"," except AttributeError:\n"," price = \"\"\n","\n"," return price\n","\n","# Function to extract Product Rating\n","def get_rating(soup):\n","\n"," try:\n"," rating = soup.find(\"i\", attrs={'class':'a-icon a-icon-star a-star-4-5 cm-cr-review-stars-spacing-big'}).text.strip()\n","\n"," except AttributeError:\n"," try:\n"," rating = soup.find(\"span\", attrs={'class':'a-icon-alt'}).text.strip()\n"," except:\n"," rating = \"\"\n","\n"," return rating\n","\n","# Function to extract Number of User Reviews\n","def get_review_count(soup):\n"," try:\n"," review_count = soup.find(\"span\", attrs={'id':'acrCustomerReviewText'}).text.strip()\n","\n"," except AttributeError:\n"," review_count = \"\"\n","\n"," return review_count\n","\n","# Function to extract Availability Status\n","def get_availability(soup):\n"," try:\n"," available = soup.find(\"span\", attrs={'class':'a-size-medium a-color-success'}).text.strip()\n","\n"," except AttributeError:\n"," try:\n"," available = soup.find(\"span\", attrs={'class':'a-size-base a-color-price a-text-bold'}).text.strip()\n"," except AttributeError:\n"," available = \"Not Available\"\n","\n"," return available"]},{"cell_type":"code","execution_count":32,"id":"4713b996","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"4713b996","executionInfo":{"status":"ok","timestamp":1736407463579,"user_tz":-360,"elapsed":13067,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"d67af827-0090-4bd6-84a3-6e404e4df1c0"},"outputs":[{"output_type":"stream","name":"stdout","text":["webpage: \n"]}],"source":["if __name__ == '__main__':\n"," # add your user agent\n"," HEADERS = {'User-Agent': '', 'Accept-Language': 'en-US, en;q=0.5'}\n","\n"," # The webpage URL\n"," URL = \"https://www.amazon.com/s?k=laptop&crid=O6A9159L6OOM&sprefix=lap%2Caps%2C300&ref=nb_sb_ss_ts-doa-p_1_3\"\n","\n"," # HTTP Request\n"," webpage = requests.get(URL, headers=HEADERS)\n"," print(\"webpage: \",webpage)\n","\n"," # Soup Object containing all data\n"," soup = BeautifulSoup(webpage.content, \"html.parser\") # Convert Byte to Html Formate\n","\n"," # Fetch links as List of Tag Objects\n"," links = soup.find_all(\"a\", attrs={'class':'a-link-normal s-no-outline'})\n"," # print(\"links: \",links)\n","\n"," # Store the links\n"," links_list = []\n","\n"," # Loop for extracting links from Tag Objects\n"," for link in links:\n"," links_list.append(link.get('href'))\n","\n"," d = {\"title\":[], \"price\":[], \"rating\":[], \"reviews\":[],\"availability\":[]}\n","\n"," # Loop for extracting product details from each link\n"," for link in links_list:\n"," new_webpage = requests.get(\"https://www.amazon.com\" + link, headers=HEADERS)\n","\n"," new_soup = BeautifulSoup(new_webpage.content, \"html.parser\")\n","\n"," # Function calls to display all necessary product information\n"," d['title'].append(get_title(new_soup))\n"," d['price'].append(get_price(new_soup))\n"," d['rating'].append(get_rating(new_soup))\n"," d['reviews'].append(get_review_count(new_soup))\n"," d['availability'].append(get_availability(new_soup))\n","\n","\n"," amazon_df = pd.DataFrame.from_dict(d)\n"," amazon_df.to_csv(\"amazon_data.csv\", header=True, index=False)"]},{"cell_type":"code","execution_count":33,"id":"db08fbaf","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":551},"id":"db08fbaf","executionInfo":{"status":"ok","timestamp":1736407463579,"user_tz":-360,"elapsed":5,"user":{"displayName":"44-271-Munsi Walid Al Hassan Nizhu","userId":"16216461530557409787"}},"outputId":"c6bc2f33-7779-434f-cae2-c640ea2b3789"},"outputs":[{"output_type":"execute_result","data":{"text/plain":[" title price \\\n","0 HP 14 Laptop, Intel Celeron N4020, 4 GB RAM, 6... \n","1 \n","2 \n","3 \n","4 Lenovo Essential IdeaPad • 36GB RAM • 1.5TB St... \n","5 \n","6 ACEMAGIC Laptop,15.6In Windows 11 Laptop Compu... \n","7 \n","8 HP 15.6\" Laptop, 32GB RAM 1TB SSD | FHD Busine... \n","9 \n","10 \n","11 ASUS Chromebook CM14 Laptop, 14\" HD Anti-Glare... \n","12 HP Newest 14\" LED Business Laptop Computer, 16... \n","13 ApoloSign 15.6\" Full HD Laptop, 12GB RAM, 512G... \n","14 HP 14\" Laptop, 8GB DDR4 RAM, 64GB eMMC, Silver... \n","15 HP 17 Laptop, 17.3\" HD+ Touchscreen Display, 1... $903.99 \n","\n"," rating reviews availability \n","0 4.0 out of 5 stars (2,058) Not Available \n","1 Not Available \n","2 Not Available \n","3 Not Available \n","4 4.2 out of 5 stars (56) Not Available \n","5 Not Available \n","6 4.3 out of 5 stars (14) Not Available \n","7 Not Available \n","8 4.8 out of 5 stars (7) Not Available \n","9 Not Available \n","10 Not Available \n","11 4.2 out of 5 stars (1,169) Not Available \n","12 4.1 out of 5 stars (93) Not Available \n","13 5.0 out of 5 stars (8) Not Available \n","14 4.0 out of 5 stars (1,908) Not Available \n","15 4.2 out of 5 stars 453 ratings In Stock "],"text/html":["\n","
\n","
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
titlepriceratingreviewsavailability
0HP 14 Laptop, Intel Celeron N4020, 4 GB RAM, 6...4.0 out of 5 stars(2,058)Not Available
1Not Available
2Not Available
3Not Available
4Lenovo Essential IdeaPad • 36GB RAM • 1.5TB St...4.2 out of 5 stars(56)Not Available
5Not Available
6ACEMAGIC Laptop,15.6In Windows 11 Laptop Compu...4.3 out of 5 stars(14)Not Available
7Not Available
8HP 15.6\" Laptop, 32GB RAM 1TB SSD | FHD Busine...4.8 out of 5 stars(7)Not Available
9Not Available
10Not Available
11ASUS Chromebook CM14 Laptop, 14\" HD Anti-Glare...4.2 out of 5 stars(1,169)Not Available
12HP Newest 14\" LED Business Laptop Computer, 16...4.1 out of 5 stars(93)Not Available
13ApoloSign 15.6\" Full HD Laptop, 12GB RAM, 512G...5.0 out of 5 stars(8)Not Available
14HP 14\" Laptop, 8GB DDR4 RAM, 64GB eMMC, Silver...4.0 out of 5 stars(1,908)Not Available
15HP 17 Laptop, 17.3\" HD+ Touchscreen Display, 1...$903.994.2 out of 5 stars453 ratingsIn Stock
\n","
\n","
\n","\n","
\n"," \n","\n"," \n","\n"," \n","
\n","\n","\n","
\n"," \n","\n","\n","\n"," \n","
\n","\n","
\n"," \n"," \n"," \n","
\n","\n","
\n","
\n"],"application/vnd.google.colaboratory.intrinsic+json":{"type":"dataframe","variable_name":"amazon_df","summary":"{\n \"name\": \"amazon_df\",\n \"rows\": 16,\n \"fields\": [\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"HP 14\\\" Laptop, 8GB DDR4 RAM, 64GB eMMC, Silver | Portable, Student and Business, HD Display, Intel Quad-Core N4120, 1 Year Office 365, Webcam, RJ-45, HDMI, Wi-Fi, Windows 11 Home\",\n \"\",\n \"ASUS Chromebook CM14 Laptop, 14\\\" HD Anti-Glare Display (1366x768), MediaTek Kompanio 520, 4GB RAM, 64GB eMMC, ChromeOS, Gray, CM1402CM2A-DS44, Gravity Grey\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"$903.99\",\n \"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"4.0 out of 5 stars\",\n \"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"reviews\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"(1,908)\",\n \"\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"availability\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"In Stock\",\n \"Not Available\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"}},"metadata":{},"execution_count":33}],"source":["amazon_df"]}],"metadata":{"kernelspec":{"display_name":"pp","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.12.7"},"colab":{"provenance":[]}},"nbformat":4,"nbformat_minor":5}