Spaces:

LinhVuu
/

price-comparison

Sleeping

App Files Files Community

Linh Vuu commited on Apr 20, 2024

Commit

39a482a

1 Parent(s): a088ba6

added files

Browse files

Files changed (6) hide show

README copy.md +12 -0
app.py +85 -0
requirements.txt +4 -0
scraper_lazada.py +208 -0
scraper_shopee.py +229 -0
scraper_tiki.py +283 -0

README copy.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: PriceComparison
+emoji: 👀
+colorFrom: pink
+colorTo: yellow
+sdk: streamlit
+sdk_version: 1.33.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from scraper_tiki import *
+from scraper_lazada import *
+from scraper_shopee import *
+import pandas as pd
+import streamlit as st
+# #test Tiki
+# start_driver()
+# DRIVER.get('https://tiki.vn/search?sort=price%2Casc&q=megaduo')
+# time.sleep(3)
+# products = DRIVER.find_elements(By.CLASS_NAME, 'product-item')
+# product = products[2]
+# info = get_tiki_product_info_single(product, True)
+# print(info)
+# # Test Lazada
+# start_driver()
+# DRIVER.get('https://www.lazada.vn/catalog/?page=1&q=megaduo&sort=priceasc')
+# time.sleep(3)
+# products = DRIVER.find_elements(By.CLASS_NAME, 'Bm3ON')
+# product = products[2]
+# info = get_lazada_product_info_single(product, True)
+# print(info)
+def main():
+    st.subheader("Price Comparison (So Sánh Giá)")
+    with st.form(key="user_input_form"):
+        search_product = st.text_input("What would you like to buy? (Bạn muốn mua gì?)")
+        submit_button = st.form_submit_button(label="Search")
+        if submit_button:
+            print('Scraping', search_product)
+            # search_product = "megaduo"
+            # search_product = input("Search for what? ")
+            num_max_page = 1
+            extra_info = True
+            n_products_to_view = 5 # Change this as you like to check more products
+            col_to_display = ['name', 'price', 'product_url', 'image']
+            st.subheader("Shopee")
+            shopee_data = scrap_shopee(search_product, num_max_page, extra_info)
+            if shopee_data:
+                df_shopee = pd.DataFrame(data=shopee_data, columns=shopee_data[0].keys())
+                print(df_shopee.head())
+                st.write(df_shopee[col_to_display].sort_values(by='price').head(n_products_to_view))
+            else:
+                df_shopee = pd.DataFrame(columns = col_to_display)
+                st.write("Not found.")
+            st.subheader("Lazada")
+            lazada_data = scrap_lazada(search_product, num_max_page, extra_info)
+            if lazada_data:
+                df_lazada = pd.DataFrame(data=lazada_data, columns=lazada_data[0].keys())
+                print(df_lazada.head())
+                st.write(df_lazada[col_to_display].sort_values(by='price').head(n_products_to_view))
+            else:
+                df_lazada = pd.DataFrame(columns = col_to_display)
+                st.write("Not found.")
+            st.subheader("Tiki")
+            tiki_data = scrap_tiki(search_product, num_max_page, extra_info)
+            if tiki_data:
+                df_tiki = pd.DataFrame(data=tiki_data, columns=tiki_data[0].keys())
+                print(df_tiki.head())
+                st.write(df_tiki[col_to_display].sort_values(by='price').head(n_products_to_view))
+            else:
+                df_tiki = pd.DataFrame(columns = col_to_display)
+                st.write("Not found.")
+            # Merge the two dataframes
+            merged_df = pd.concat([df_tiki, df_lazada, df_shopee])
+            # Sort the merged dataframe by price
+            sorted_merged_df = merged_df.sort_values(by='price')
+            print(sorted_merged_df.head(n_products_to_view))
+            st.subheader("All sites, sorted by price ascending (Sắp xếp theo giá tăng dần)")
+            st.write(sorted_merged_df.head(n_products_to_view))
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+selenium
+pandas
+streamlit==1.13.0
+altair==4.1.0

scraper_lazada.py ADDED Viewed

	@@ -0,0 +1,208 @@

+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+import time
+# Global driver to use throughout the script
+DRIVER = None
+# Wrapper to close driver if its created
+def close_driver():
+    global DRIVER
+    if DRIVER is not None:
+        DRIVER.close()
+    DRIVER = None
+# Function to (re)start driver
+def start_driver(force_restart=False):
+    global DRIVER
+    if force_restart:
+        close_driver()
+    # Setting up the driver
+    options = Options()
+    options.add_argument('-headless') # we don't want a chrome browser opens, so it will run in the background
+    options.add_argument('-no-sandbox')
+    options.add_argument('-disable-dev-shm-usage')
+    DRIVER = webdriver.Chrome(options=options)
+### Function to extract product info from the necessary html and json tags
+def get_lazada_product_info_single(product_element, extra_info):
+    """
+    Extract info from a single product element from the driver.
+    Args:
+        product_item: (WebDriverElement) the product whose info needs to be
+                        extracted.
+    Returns:
+        info: (dict) a dictionary of info of the product. Every product
+                should at least have four pieces of information: name, price,
+                link to the product page, and link to the product image.
+    """
+    info = {'source': 'lazada',
+            'name':'',
+            'price':-1,
+            'product_url':'',
+            'image':''}
+    # print(product_element.get_attribute('outerHTML'))
+    try:
+        # Find the <a> element within the <div class="RfADt">
+        product_title_element = product_element.find_element(By.XPATH, "//div[@class='RfADt']/a")
+        # Get the text content of the <a> element
+        info['name'] = product_title_element.text
+    except NoSuchElementException:
+        info['name'] = ""
+    # price
+    try:
+        # Find the <span> element with class "ooOxS" within the <div class="aBrP0">
+        price_element = product_element.find_element(By.XPATH, "//div[@class='aBrP0']/span[@class='ooOxS']")
+        # Get the text content of the <span> element
+        price_text = price_element.text
+        # Extract the price value
+        info['price'] = int(price_text.split(" ")[0].replace('.', ''))
+    except (NoSuchElementException, ValueError):
+        pass
+    # link
+    try:
+        # Find the <a> element within the <div class="RfADt">
+        product_link_element = product_element.find_element(By.XPATH, "//div[@class='RfADt']/a")
+        # Get the href attribute of the <a> element
+        product_link = product_link_element.get_attribute("href")
+        # Extract the URL from the href attribute
+        info['product_url'] = product_link.split("//")[1]
+    except NoSuchElementException:
+        pass
+    # thumbnail
+    try:
+        # Find the <img> element within the <div class="_95X4G">
+        image_element = product_element.find_element(By.XPATH, "//div[@class='_95X4G']/a/div/img")
+        # Get the src attribute of the <img> element
+        info['image'] = image_element.get_attribute("src")
+    except NoSuchElementException:
+        pass
+    # If we decide to get extra information
+    if extra_info:
+        # sales
+        try:
+            # Find the <span> element within the <div class="_6uN7R">
+            sold_element = product_element.find_element(By.XPATH, "//div[@class='_6uN7R']/span[@class='_1cEkb']/span[1]")
+            # Get the text content of the <span> element
+            info['sales'] = sold_element.text
+        except (NoSuchElementException, ValueError):
+            info['sales'] = 0
+        try:
+            # Find the <span> element within the <div class="WNoq3">
+            discount_element = product_element.find_element(By.XPATH, "//div[@class='WNoq3']/span[@class='IcOsH']")
+            # Get the text content of the <span> element
+            info['discount'] = discount_element.text
+        except (NoSuchElementException, ValueError):
+            info['discount'] = '0'
+    return info
+### Function to scrape all products from a page
+def get_lazada_product_info_from_page(page_url, extra_info=False):
+    """
+    Extract info from all products of a specfic page_url on Tiki website
+    Args:
+        page_url: (string) url of the page to scrape
+    Returns:
+        data: (list) a list of dictionary of products info. If no products
+                found, return empty list.
+    """
+    global DRIVER
+    data = []
+    DRIVER.get(page_url) # Use the driver to get info from the product page
+    time.sleep(3)
+    try:
+        # no_product_found = bool(DRIVER.find_element(By.XPATH, "//div[@class='style__StyledNotFoundProductView-sc-1uz0b49-0']"))
+        no_product_found = bool(DRIVER.find_element(By.CLASS_NAME, 'style__StyledNotFoundProductView-sc-1uz0b49-0'))
+        print("EMPTY PAGE")
+        return data
+    except NoSuchElementException:
+        no_product_found = False
+    # FIND ALL PRODUCT ITEMS
+    products = DRIVER.find_elements(By.CLASS_NAME, 'Bm3ON')
+    print(f'Found {len(products)} products')
+    if (not no_product_found) and len(products)>0:
+        for i in products:
+            product_dict = get_lazada_product_info_single(i, extra_info)
+            data.append(product_dict)
+    return data
+### Function to get product info from a main category
+def get_lazada_product_info_from_category(cat_url, max_page=0, extra_info=False):
+    '''
+    Scrape for multiple pages of products of a category.
+    Uses get_product_info_from_page().
+    Args:
+        cat_url: (string) a url string of a category
+        max_page: (int) an integer denoting the maximum number of pages to scrape.
+                  Default value is 0 to scrape all pages.
+    Returns:
+        products: a list in which every element is a dictionary of one product's information
+    '''
+    products = []
+    page_n = 1
+    cat_page_url = cat_url + f'?page={page_n}'
+    product_list = get_lazada_product_info_from_page(cat_page_url, extra_info=extra_info)
+    while len(product_list)>0:
+        products.extend(product_list)
+        page_n += 1
+        # stop_flag = False if max_page <= 0 else (page_n > max_page)
+        stop_flag = max_page>0 and page_n>max_page # For stopping the scrape according to max_page
+        if stop_flag:
+            break
+        cat_page_url = cat_url + f'?page={page_n}'
+        product_list = get_lazada_product_info_from_page(cat_page_url, extra_info=extra_info)
+    return products
+def scrap_lazada(search_product, num_max_page, extra_info):
+    start_driver(force_restart=True)
+    url = 'https://www.lazada.vn/catalog/?q=' + search_product
+    prod_data = [] # STORE YOUR PRODUCT INFO DICTIONARIES IN HERE
+    # prod_per_cat = get_product_info_from_category(main_cat['URL'], num_max_page, extra_info=extra_info)
+    prod_per_cat = get_lazada_product_info_from_category(url, num_max_page, extra_info=extra_info)
+    prod_data.extend(prod_per_cat)
+    close_driver() # Close driver when we're done
+    return prod_data

scraper_shopee.py ADDED Viewed

	@@ -0,0 +1,229 @@

+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+import time
+# Global driver to use throughout the script
+DRIVER = None
+# Wrapper to close driver if its created
+def close_driver():
+    global DRIVER
+    if DRIVER is not None:
+        DRIVER.close()
+    DRIVER = None
+# Function to (re)start driver
+def start_driver(force_restart=False):
+    global DRIVER
+    if force_restart:
+        close_driver()
+    # Setting up the driver
+    options = Options()
+    options.add_argument('-headless') # we don't want a chrome browser opens, so it will run in the background
+    options.add_argument('-no-sandbox')
+    options.add_argument('-disable-dev-shm-usage')
+    DRIVER = webdriver.Chrome(options=options)
+### Function to extract product info from the necessary html and json tags
+def get_shopee_product_info_single(product_element, extra_info):
+    """
+    Extract info from a single product element from the driver.
+    Args:
+        product_item: (WebDriverElement) the product whose info needs to be
+                        extracted.
+    Returns:
+        info: (dict) a dictionary of info of the product. Every product
+                should at least have four pieces of information: name, price,
+                link to the product page, and link to the product image.
+    """
+    info = {'source': 'shopee',
+            'name':'',
+            'price':-1,
+            'product_url':'',
+            'image':''}
+    print(product_element.get_attribute('outerHTML'))
+    try:
+        # Find the <a> element within the <div class>
+        product_title_element = product_element.find_element(By.CLASS_NAME, "line-clamp-2")
+        # Get the text content of the <a> element
+        info['name'] = product_title_element.text
+        print(info['name'])
+    except NoSuchElementException:
+        info['name'] = ""
+    # price
+    try:
+        # Find the <span> element within the <div class>
+        price_element = product_element.find_element(By.XPATH,'//div[@class="truncate flex items-baseline"]/span[@class="text-base/5 truncate"]')
+        # Get the text content of the <span> element
+        price_text = price_element.text
+        # Extract the price value
+        info['price'] = int(price_text.split(" ")[0].replace('.', ''))
+        print(info['price'])
+    except (NoSuchElementException, ValueError):
+        pass
+    # link
+    try:
+        # Find the <a> element within the <div class>
+        product_link_element = product_element.find_element(By.XPATH, '//a[@class="contents"]')
+        # Get the href attribute of the <a> element
+        product_link = product_link_element.get_attribute("href")
+        # Extract the URL from the href attribute
+        info['product_url'] = product_link
+    except NoSuchElementException:
+        pass
+    # thumbnail
+    try:
+        # Find the <img> element within the <div class>
+        image_element = product_element.find_element(By.XPATH, '//img[@class="inset-y-0 w-full h-full pointer-events-none object-contain absolute"]')
+        # Get the src attribute of the <img> element
+        info['image'] = image_element.get_attribute("src")
+    except NoSuchElementException:
+        pass
+    # If we decide to get extra information
+    if extra_info:
+        # sales
+        try:
+            # Find the <span> element within the <div class>
+            sold_element = product_element.find_element(By.XPATH, '//div[@class="truncate text-shopee-black87 text-xs min-h-4 flex-shrink-1"]')
+            # Get the text content of the <span> element
+            info['sales'] = sold_element.text
+        except (NoSuchElementException, ValueError):
+            info['sales'] = 0
+        try:
+            # Find the <span> element within the <div class>
+            discount_element = product_element.find_element(By.XPATH, '//div[@class="truncate bg-shopee-voucher-yellow text-white leading-4 text-sp10"]')
+            # Get the text content of the <span> element
+            info['discount'] = discount_element.text
+        except (NoSuchElementException, ValueError):
+            info['discount'] = '0'
+    return info
+### Function to scrape all products from a page
+def get_shopee_product_info_from_page(page_url, extra_info=False):
+    """
+    Extract info from all products of a specfic page_url on Tiki website
+    Args:
+        page_url: (string) url of the page to scrape
+    Returns:
+        data: (list) a list of dictionary of products info. If no products
+                found, return empty list.
+    """
+    global DRIVER
+    data = []
+    DRIVER.get(page_url) # Use the driver to get info from the product page
+    time.sleep(3)
+    try:
+        # no_product_found = bool(DRIVER.find_element(By.XPATH, "//div[@class='style__StyledNotFoundProductView-sc-1uz0b49-0']"))
+        no_product_found = bool(DRIVER.find_element(By.CLASS_NAME, 'style__StyledNotFoundProductView-sc-1uz0b49-0'))
+        print("EMPTY PAGE")
+        return data
+    except NoSuchElementException:
+        no_product_found = False
+    # FIND ALL PRODUCT ITEMS
+    # products = DRIVER.find_elements(By.XPATH, "//a[@class='product-item']")
+    products = DRIVER.find_elements(By.CLASS_NAME, 'col-xs-2-4 shopee-search-item-result__item')
+    print(f'Found {len(products)} products')
+    print(products)
+    if (not no_product_found) and len(products)>0:
+        for i in products:
+            product_dict = get_shopee_product_info_single(i, extra_info)
+            print(i)
+            print(product_dict)
+            data.append(product_dict)
+    return data
+### Function to get product info from a main category
+def get_shopee_product_info_from_category(cat_url, max_page=0, extra_info=False):
+    '''
+    Scrape for multiple pages of products of a category.
+    Uses get_product_info_from_page().
+    Args:
+        cat_url: (string) a url string of a category
+        max_page: (int) an integer denoting the maximum number of pages to scrape.
+                  Default value is 0 to scrape all pages.
+    Returns:
+        products: a list in which every element is a dictionary of one product's information
+    '''
+    products = []
+    page_n = 1
+    cat_page_url = cat_url + f'?page={page_n}'
+    product_list = get_shopee_product_info_from_page(cat_page_url, extra_info=extra_info)
+    while len(product_list)>0:
+        products.extend(product_list)
+        page_n += 1
+        # stop_flag = False if max_page <= 0 else (page_n > max_page)
+        stop_flag = max_page>0 and page_n>max_page # For stopping the scrape according to max_page
+        if stop_flag:
+            break
+        cat_page_url = cat_url + f'?page={page_n}'
+        product_list = get_shopee_product_info_from_page(cat_page_url, extra_info=extra_info)
+    return products
+def scrap_shopee(search_product, num_max_page, extra_info):
+    # # #test Shopee
+    # start_driver()
+    # URL = 'https://shopee.vn/search?keyword=megaduo&page=0&sortBy=relevancy'
+    # DRIVER.get(URL)
+    # time.sleep(3)
+    # print(URL)
+    # products = DRIVER.find_elements(By.CLASS_NAME, 'shopee-search-item-result')
+    # # products = DRIVER.find_element("css selector", 'li[class="col-xs-2-4 shopee-search-item-result__item"]')
+    # product = products[0]
+    # # Wait for the element to be present on the page
+    # info = get_shopee_product_info_single(product, True)
+    # print(info)
+    start_driver(force_restart=True)
+    url = 'https://shopee.vn/search?keyword=' + search_product
+    prod_data = [] # STORE YOUR PRODUCT INFO DICTIONARIES IN HERE
+    # prod_per_cat = get_product_info_from_category(main_cat['URL'], num_max_page, extra_info=extra_info)
+    prod_per_cat = get_shopee_product_info_from_category(url, num_max_page, extra_info=extra_info)
+    prod_data.extend(prod_per_cat)
+    close_driver() # Close driver when we're done
+    return prod_data

scraper_tiki.py ADDED Viewed

	@@ -0,0 +1,283 @@

+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.common.exceptions import NoSuchElementException
+from selenium.webdriver.common.by import By
+import time
+# Global driver to use throughout the script
+DRIVER = None
+# Wrapper to close driver if its created
+def close_driver():
+    global DRIVER
+    if DRIVER is not None:
+        DRIVER.close()
+    DRIVER = None
+# Function to (re)start driver
+def start_driver(force_restart=False):
+    global DRIVER
+    if force_restart:
+        close_driver()
+    # Setting up the driver
+    options = Options()
+    options.add_argument('-headless') # we don't want a chrome browser opens, so it will run in the background
+    options.add_argument('-no-sandbox')
+    options.add_argument('-disable-dev-shm-usage')
+    DRIVER = webdriver.Chrome(options=options)
+### Function to extract product info from the necessary html and json tags
+def get_tiki_product_info_single(product_element, extra_info):
+    """
+    Extract info from a single product element from the driver.
+    Args:
+        product_item: (WebDriverElement) the product whose info needs to be
+                        extracted.
+    Returns:
+        info: (dict) a dictionary of info of the product. Every product
+                should at least have four pieces of information: name, price,
+                link to the product page, and link to the product image.
+    """
+    info = {'source': 'tiki',
+            'name':'',
+            'price':-1,
+            'product_url':'',
+            'image':''}
+    # print(product_element.get_attribute('outerHTML'))
+    try:
+        # name = product_element.find_element(By.XPATH, ".//div[@class='name']/h3")
+        # name = product_element.find_element(By.CLASS_NAME, 'style__NameStyled-sc-139nb47-8 ibOlar').find_element(By.TAG_NAME, 'h3')
+        name = product_element.find_element(By.CLASS_NAME, 'name').find_element(By.TAG_NAME, 'h3')
+        info['name'] = name.get_attribute('innerHTML').strip()
+    except NoSuchElementException:
+        # Find the <h3> element by class name
+        name = product_element.find_element(By.CLASS_NAME, 'style__NameStyled-sc-139nb47-8')
+        # Get the text content of the element
+        info['name'] = name.text
+    # price = product_element.find_element(By.CLASS_NAME, 'price-discount__price').get_attribute('innerHTML')
+    # print(price)
+    # price
+    try:
+        # price=product_element.find_element(By.CLASS_NAME, 'price-discount__price').get_attribute('innerHTML').strip()
+        price = product_element.find_element(By.CLASS_NAME, 'price-discount__price').get_attribute('innerHTML')
+        # price = product_element.find_element(By.XPATH, ".//div[@class='price-discount__price']").get_attribute('innerHTML')
+        info['price']=int(price.replace('<sup>₫</sup>', '').replace('.', ''))
+        # info['price'] = int(re.sub(r'[\.\s₫]', '', price)) # With regex
+        # info['price'] = int(''.join([c for c in price if c not in '.₫ '])) # Without regex
+    except (NoSuchElementException, ValueError):
+        pass
+    # link
+    try:
+        product_link = product_element.get_attribute('href')
+        info['product_url'] = product_link
+    except NoSuchElementException:
+        pass
+    # thumbnail
+    try:
+        # thumbnail = product_element.find_elements(By.XPATH, ".//div[@class='thumbnail']//child::img")[-1]
+        # thumbnail = product_element.find_element(By.CLASS_NAME, 'thumbnail').find_element(By.TAG_NAME, 'img')
+        # info['image'] = thumbnail.get_attribute('src')
+        # Find the <div> element with class "image-wrapper"
+        image_div = product_element.find_element(By.CLASS_NAME, 'image-wrapper')
+        # Find the <img> element within the <div> element
+        img_element = image_div.find_element(By.TAG_NAME, 'img')
+        # Get the value of the "srcset" attribute
+        srcset_value = img_element.get_attribute('srcset')
+        # Extract the link of the image from the srcset value
+        image_link = srcset_value.split(',')[0].split(' ')[0]
+        info['image'] = image_link
+    except NoSuchElementException:
+        pass
+    # If we decide to get extra information
+    if extra_info:
+        # sales
+        try:
+            # sales_elem = product_element.find_element(By.XPATH, ".//div[@class='styles__StyledQtySold-sc-732h27-2']")
+            # sales_elem = product_element.find_element(By.CLASS_NAME, 'quantity has-border')
+            # info['sales'] = sales_elem
+            # info['sales'] = int(re.sub(r'\D', '', sales_elem.get_attribute('innerHTML')))
+            # Find the <span> element with class "quantity"
+            quantity_span = product_element.find_element(By.CLASS_NAME, 'quantity')
+            # Get the text content of the element
+            info['sales'] = quantity_span.text
+        except (NoSuchElementException, ValueError):
+            info['sales'] = 0
+        # # rating
+        # try:
+        #     # rating = product_element.find_element(By.XPATH, ".//div[@class='average']").get_attribute('style')
+        #     rating = product_element.find_element(By.CLASS_NAME, 'average').get_attribute('style')
+        #     # info['rating'] = float(re.sub(r'\D','', rating))/100*5 # With regex
+        #     info['rating'] = float(''.join([c for c in rating if c.isdigit()]))/100*5 # Without regex
+        # except NoSuchElementException:
+        #     info['rating'] = 0
+        try:
+            # Try to get discount using class name
+            discount = product_element.find_element(By.CLASS_NAME, 'price-discount__discount').get_attribute('innerHTML')
+            info['discount'] = discount.replace('-', '')  # Remove any dashes
+        except (NoSuchElementException, ValueError):
+            try:
+                # Try to get discount using another method
+                discount_div = product_element.find_element(By.CLASS_NAME, 'style__DiscountPercentStyled-sc-e9h7mj-1')
+                info['discount'] = discount_div.text.replace('-', '')  # Remove any dashes
+            except NoSuchElementException:
+                # If both attempts fail, set discount to 0
+                info['discount'] = '0'
+        # # tiki now
+        # try:
+        #     info['tiki_now'] = bool(product_element.find_element(By.CLASS_NAME, 'badge-service').find_element(By.CLASS_NAME, 'item'))
+        # except NoSuchElementException:
+        #     info['tiki_now'] = False
+        # # freeship, official seller, and/or trusted seller
+        # try:
+        #     info['freeship'] = False
+        #     info['official'] = False
+        #     info['trusted']  = False
+        #     thumbnail_tag = product_element.find_element(By.CLASS_NAME, 'thumbnail')
+        #     list_img = thumbnail_tag.find_elements(By.TAG_NAME, 'img')
+        #     # list_img = product_element.find_elements(By.XPATH, ".//div[@class='thumbnail']/img")
+        #     for img in list_img:
+        #         if   img.get_attribute('src') == 'https://salt.tikicdn.com/ts/upload/dc/0d/49/3251737db2de83b74eba8a9ad6d03338.png':
+        #             info['freeship'] = True
+        #         elif img.get_attribute('src') == 'https://salt.tikicdn.com/ts/upload/b9/1f/4b/557eac9c67a4466ccebfa74cde854215.png':
+        #             info['official'] = True
+        #         elif img.get_attribute('src') == 'https://salt.tikicdn.com/ts/upload/e0/41/da/bb0fc684a838eff5e264ce0534a148f0.png':
+        #             info['trusted']  = True
+        # except NoSuchElementException:
+        #     pass
+        # # under price
+        # try:
+        #     # info['under_price'] = bool(product_element.find_element(By.XPATH, ".//div[@class='badge-under-price']/child::div[@class='item']"))
+        #     info['under_price'] = bool(product_element.find_element(By.CLASS_NAME, 'badge-under-price').find_element(By.CLASS_NAME, 'item'))
+        # except NoSuchElementException:
+        #     info['under_price'] = False
+        # # installment
+        # try:
+        #     # info['installment'] = bool(product_element.find_element(By.XPATH, ".//div[@class='badge-benefits']//child::img[1]"))
+        #     info['installment'] = bool(product_element.find_element(By.CLASS_NAME, 'badge-benefits').find_element(By.TAG_NAME, 'img'))
+        # except NoSuchElementException:
+        #     info['installment'] = False
+        # # gift
+        # try:
+        #     # info['gift'] = bool(product_element.find_element(By.XPATH, ".//div[@class='freegift-list']"))
+        #     info['gift'] = bool(product_element.find_element(By.CLASS_NAME, 'freegift-list'))
+        # except NoSuchElementException:
+        #     info['gift'] = False
+    return info
+### Function to scrape all products from a page
+def get_tiki_product_info_from_page(page_url, extra_info=False):
+    """
+    Extract info from all products of a specfic page_url on Tiki website
+    Args:
+        page_url: (string) url of the page to scrape
+    Returns:
+        data: (list) a list of dictionary of products info. If no products
+                found, return empty list.
+    """
+    global DRIVER
+    data = []
+    DRIVER.get(page_url) # Use the driver to get info from the product page
+    time.sleep(3)
+    try:
+        # no_product_found = bool(DRIVER.find_element(By.XPATH, "//div[@class='style__StyledNotFoundProductView-sc-1uz0b49-0']"))
+        no_product_found = bool(DRIVER.find_element(By.CLASS_NAME, 'style__StyledNotFoundProductView-sc-1uz0b49-0'))
+        print("EMPTY PAGE")
+        return data
+    except NoSuchElementException:
+        no_product_found = False
+    # FIND ALL PRODUCT ITEMS
+    # products = DRIVER.find_elements(By.XPATH, "//a[@class='product-item']")
+    products = DRIVER.find_elements(By.CLASS_NAME, 'product-item')
+    print(f'Found {len(products)} products')
+    if (not no_product_found) and len(products)>0:
+        for i in products:
+            product_dict = get_tiki_product_info_single(i, extra_info)
+            data.append(product_dict)
+    return data
+### Function to get product info from a main category
+def get_tiki_product_info_from_category(cat_url, max_page=0, extra_info=False):
+    '''
+    Scrape for multiple pages of products of a category.
+    Uses get_product_info_from_page().
+    Args:
+        cat_url: (string) a url string of a category
+        max_page: (int) an integer denoting the maximum number of pages to scrape.
+                  Default value is 0 to scrape all pages.
+    Returns:
+        products: a list in which every element is a dictionary of one product's information
+    '''
+    products = []
+    page_n = 1
+    cat_page_url = cat_url + f'?page={page_n}'
+    product_list = get_tiki_product_info_from_page(cat_page_url, extra_info=extra_info)
+    while len(product_list)>0:
+        products.extend(product_list)
+        page_n += 1
+        # stop_flag = False if max_page <= 0 else (page_n > max_page)
+        stop_flag = max_page>0 and page_n>max_page # For stopping the scrape according to max_page
+        if stop_flag:
+            break
+        cat_page_url = cat_url + f'?page={page_n}'
+        product_list = get_tiki_product_info_from_page(cat_page_url, extra_info=extra_info)
+    return products
+def scrap_tiki(search_product, num_max_page, extra_info):
+    start_driver(force_restart=True)
+    url = 'https://tiki.vn/search?sort=default&q="' + search_product +'"'
+    prod_data = [] # STORE YOUR PRODUCT INFO DICTIONARIES IN HERE
+    # prod_per_cat = get_product_info_from_category(main_cat['URL'], num_max_page, extra_info=extra_info)
+    prod_per_cat = get_tiki_product_info_from_category(url, num_max_page, extra_info = extra_info)
+    prod_data.extend(prod_per_cat)
+    close_driver() # Close driver when we're done
+    return prod_data