Spaces:
Sleeping
Sleeping
| # Import necessary libraries | |
| from selenium import webdriver | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.support.ui import WebDriverWait | |
| import time | |
| """ | |
| This code uses Selenium to scrape data from a webpage. | |
| It initializes a Chrome webdriver and loads the webpage defined by the url variable. | |
| It then clicks a modal pop-up that appears when the webpage is loaded. | |
| The code then enters a loop to scrape data from each page of the webpage. | |
| It locates the data tables and extracts the table rows. | |
| It loops through each row of the table and extracts the name, region, and description from the row. | |
| It appends these values to the dai_values list. | |
| The code then finds the 'Next' button and checks if it is disabled. | |
| If the button is disabled, the loop is broken. | |
| If the button is not disabled, the code clicks the button, scrolls to it, and waits for 2 seconds before moving on to the next page. | |
| Finally, the code quits the webdriver. | |
| """ | |
| # Define the URL of the webpage to be scraped | |
| url = 'https://www.hdrn.ca/en/inventory/' | |
| # Initialize a Chrome webdriver | |
| driver = webdriver.Chrome() | |
| driver.get(url) | |
| # Define a wait time for the driver to locate web elements | |
| wait = WebDriverWait(driver, 2) | |
| # Create an empty list to store the scraped data | |
| dai_values = [] | |
| # Click the modal pop-up that appears when the webpage is loaded | |
| driver.find_element(By.ID, 'myModal').click() | |
| # Loop through the webpage to scrape data from each page | |
| while True: | |
| # Locate the data tables and extract the table rows | |
| data_tables_scroll = driver.find_elements(By.CLASS_NAME, 'dataTables_scrollBody')[-1] | |
| table = data_tables_scroll.find_elements(By.TAG_NAME, 'tr') | |
| # Loop through each row of the table | |
| for row in table: | |
| # Extract the values from each cell of the row | |
| row_values = row.find_elements(By.TAG_NAME, 'td') | |
| # If the row has less than 2 cells, skip to the next row | |
| if len(row_values) < 2: | |
| continue | |
| # Extract the name, region, and description from the row and append to the dai_values list | |
| name, region, description = row_values | |
| dai_values.append({ | |
| 'name': name.text, | |
| 'region': region.text, | |
| 'description': description.text | |
| }) | |
| # Find the 'Next' button and check if it is disabled | |
| next_button = driver.find_elements(By.ID, 'thelist_next') | |
| if 'disabled' in next_button[0].get_attribute('class'): | |
| # If the button is disabled, break out of the loop | |
| break | |
| else: | |
| # If the button is not disabled, click it, scroll to it, and wait for 2 seconds before moving on to the next page | |
| driver.click() | |
| driver.execute_script("arguments[0].scrollIntoView();", next_button[0]) | |
| time.sleep(2) | |
| next_button[0].click() | |
| # Quit the webdriver | |
| driver.quit() | |