Spaces:

NZLouislu
/

NZProperty

Paused

App Files Files Community

NZProperty / main.py

NZLouislu

Add code for fetch property data of Auckland NZ.

3624bf2 8 months ago

raw

history blame contribute delete

3.88 kB

	import time
	import requests
	from bs4 import BeautifulSoup
	from fetch_property_links import fetch_property_links
	from properties import fetch_property_details
	# from config.redis_config import create_redis_client, check_property_in_redis, add_property_to_redis
	from config.supabase_config import insert_property_and_history

	# Main function to scrape properties

	def fetch_suburbs(url, city):
	"""
	Fetches the list of suburbs and their links from a given URL.
	"""

	response = requests.get(url)
	if response.status_code == 200:
	soup = BeautifulSoup(response.content, 'html.parser')
	suburb_links_container = soup.find('div', {'testid': 'suburbLinksContainer'})
	if suburb_links_container:
	suburb_links = suburb_links_container.find_all('a')
	# Reverse the order of links
	for link in suburb_links:
	suburb_name = link.get_text(strip=True)
	suburb_link = "https://propertyvalue.co.nz" + link.get('href')
	print(f"Suburb: {suburb_name}, Link: {suburb_link}")

	# Fetch the page content for the suburb link
	suburb_response = requests.get(suburb_link)
	print(f" Status code for {suburb_name}: {suburb_response.status_code}")
	if suburb_response.status_code == 200:
	suburb_soup = BeautifulSoup(suburb_response.content, 'html.parser')
	# Find the pagination element using role='group' and class_='btn-group'
	pagination = suburb_soup.find('div', {'role': 'group', 'class': 'btn-group'})
	if pagination:
	# Find the label with "of" and the next label for the max page number
	of_label = pagination.find('label', string='of')
	if of_label and of_label.find_next_sibling('label'):
	max_page = int(of_label.find_next_sibling('label').get_text(strip=True))
	print(f"Suburb: {suburb_name}, Max Pages: {max_page}")
	else:
	print(f" No page numbers found for {suburb_name}")
	else:
	print(f" No pagination element found for {suburb_name}")
	max_page = 1 # Default to 1 page if no pagination

	scrape_properties(suburb_link, max_page, city, suburb_name)

	def scrape_properties(main_url, max_pages, city, suburb):
	# redis_client = create_redis_client() # Instantiate the Redis client

	for page in range(1, max_pages + 1):
	# Fetch property links and titles for the current page
	property_links, titles = fetch_property_links(main_url, page)

	# Print and fetch details for each property on the current page
	for property_url, title in zip(property_links, titles):
	print(f"Fetching details for: {title}")

	# Check if the property address already exists in Redis
	# if check_property_in_redis(redis_client, title):
	# print(f"Property {title} already exists in Redis. Skipping...")
	# continue

	# Fetch property details and history
	property_data, history_data = fetch_property_details(property_url, title, city, suburb)

	# Insert into Supabase
	insert_property_and_history(property_data, history_data)

	# Add the property to Redis to avoid duplicates
	# add_property_to_redis(redis_client, title)

	# time.sleep(0.5) # Adding a delay to avoid overloading the server

	# Run the scraper
	if __name__ == "__main__":
	city = "Auckland - City"
	fetch_suburbs("https://www.propertyvalue.co.nz/auckland/auckland/7", city)