NZProperty / main.py
NZLouislu's picture
Add code for fetch property data of Auckland NZ.
3624bf2
import time
import requests
from bs4 import BeautifulSoup
from fetch_property_links import fetch_property_links
from properties import fetch_property_details
# from config.redis_config import create_redis_client, check_property_in_redis, add_property_to_redis
from config.supabase_config import insert_property_and_history
# Main function to scrape properties
def fetch_suburbs(url, city):
"""
Fetches the list of suburbs and their links from a given URL.
"""
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
suburb_links_container = soup.find('div', {'testid': 'suburbLinksContainer'})
if suburb_links_container:
suburb_links = suburb_links_container.find_all('a')
# Reverse the order of links
for link in suburb_links:
suburb_name = link.get_text(strip=True)
suburb_link = "https://propertyvalue.co.nz" + link.get('href')
print(f"Suburb: {suburb_name}, Link: {suburb_link}")
# Fetch the page content for the suburb link
suburb_response = requests.get(suburb_link)
print(f" Status code for {suburb_name}: {suburb_response.status_code}")
if suburb_response.status_code == 200:
suburb_soup = BeautifulSoup(suburb_response.content, 'html.parser')
# Find the pagination element using role='group' and class_='btn-group'
pagination = suburb_soup.find('div', {'role': 'group', 'class': 'btn-group'})
if pagination:
# Find the label with "of" and the next label for the max page number
of_label = pagination.find('label', string='of')
if of_label and of_label.find_next_sibling('label'):
max_page = int(of_label.find_next_sibling('label').get_text(strip=True))
print(f"Suburb: {suburb_name}, Max Pages: {max_page}")
else:
print(f" No page numbers found for {suburb_name}")
else:
print(f" No pagination element found for {suburb_name}")
max_page = 1 # Default to 1 page if no pagination
scrape_properties(suburb_link, max_page, city, suburb_name)
def scrape_properties(main_url, max_pages, city, suburb):
# redis_client = create_redis_client() # Instantiate the Redis client
for page in range(1, max_pages + 1):
# Fetch property links and titles for the current page
property_links, titles = fetch_property_links(main_url, page)
# Print and fetch details for each property on the current page
for property_url, title in zip(property_links, titles):
print(f"Fetching details for: {title}")
# Check if the property address already exists in Redis
# if check_property_in_redis(redis_client, title):
# print(f"Property {title} already exists in Redis. Skipping...")
# continue
# Fetch property details and history
property_data, history_data = fetch_property_details(property_url, title, city, suburb)
# Insert into Supabase
insert_property_and_history(property_data, history_data)
# Add the property to Redis to avoid duplicates
# add_property_to_redis(redis_client, title)
# time.sleep(0.5) # Adding a delay to avoid overloading the server
# Run the scraper
if __name__ == "__main__":
city = "Auckland - City"
fetch_suburbs("https://www.propertyvalue.co.nz/auckland/auckland/7", city)