Spaces:

NZLouislu
/

NZProperty

Paused

App Files Files Community

NZProperty / fetch_property_links.py

NZLouislu

Add code for fetch property data of Auckland NZ.

3624bf2 10 months ago

raw

history blame contribute delete

2.02 kB

	import requests
	from bs4 import BeautifulSoup
	import time
	from requests.adapters import HTTPAdapter
	from urllib3.util.retry import Retry

	def fetch_property_links(main_url, page=1, max_retries=3):
	property_links = []
	titles = []

	url = f"{main_url}?page={page}" if page > 1 else main_url
	print(f"Fetching page {page}...")

	# 创建一个带有重试机制的会话
	session = requests.Session()
	retries = Retry(total=max_retries,
	backoff_factor=0.1,
	status_forcelist=[500, 502, 503, 504])
	session.mount('https://', HTTPAdapter(max_retries=retries))

	try:
	response = session.get(url, timeout=30)
	response.raise_for_status() # 这将抛出一个异常，如果状态码不是200

	if response.status_code == 200:
	soup = BeautifulSoup(response.text, 'html.parser')

	# 查找所有属性链接
	for link in soup.find_all('a', class_='PropertyCard_PropertyCardLink__icVIl'):
	full_link = "https://propertyvalue.co.nz" + link['href']
	property_links.append(full_link)
	titles.append(link['title']) # 获取标题属性

	print(f"\nFound {len(property_links)} properties on page {page}:")
	# 如果需要打印标题，取消下面的注释
	# for title in titles:
	# print(title)

	else:
	print(f"Unexpected status code {response.status_code} for URL: {url}")

	except requests.exceptions.RequestException as e:
	print(f"Error fetching page {page}: {e}")

	finally:
	time.sleep(2) # 增加延迟到2秒，以避免过度加载服务器

	return property_links, titles

	# 使用示例
	if __name__ == "__main__":
	main_url = "https://propertyvalue.co.nz/wellington/wellington-city/khandallah-6035/200020"
	links, titles = fetch_property_links(main_url)
	print(f"Total properties found: {len(links)}")