NZProperty / properties.py
NZLouislu's picture
Add code for fetch property data of Auckland NZ.
3624bf2
import requests
from bs4 import BeautifulSoup
from property_history import fetch_property_history
from config.supabase_config import insert_property_and_history # Assuming this function exists
# 固定的城市和 suburb
# CITY = "Porirua City"
# SUBURB = "Aotea"
# Fetch property details
def fetch_property_details(property_url, title, city, suburb):
print(f"\nFetching details for {property_url}...")
response = requests.get(property_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
# Extracting property details
address_line1 = title.split(',')[0].strip() # 从 title 中提取 address_line1
address_line2 = soup.find('span', {'testid': 'addressLine2'}).get_text(strip=True) if soup.find('span', {'testid': 'addressLine2'}) else 'N/A'
postcode = title.split(',')[-1].strip() # 获取最后一个逗号后的邮政编码
# Combine address_line1 and address_line2 into a single address field
address = f"{address_line1}, {address_line2}"
# suburb = SUBURB
# city = CITY
try:
year_built = int(soup.find('div', {'testid': 'yearBuiltValue'}).get_text(strip=True))
except (AttributeError, ValueError):
year_built = None
try:
bedrooms = int(soup.find('span', {'testid': 'bed'}).get_text(strip=True))
except (AttributeError, ValueError):
bedrooms = None
try:
bathrooms = int(soup.find('span', {'testid': 'bath'}).get_text(strip=True))
except (AttributeError, ValueError):
bathrooms = None
try:
car_spaces = int(soup.find('span', {'testid': 'car'}).get_text(strip=True))
except (AttributeError, ValueError):
car_spaces = None
try:
floor_size = soup.find('span', class_='floor PropertyAttributes_attribute__3bkWm').get_text(strip=True)
except AttributeError:
floor_size = 'N/A'
try:
land_area = soup.find('span', class_='land PropertyAttributes_attribute__3bkWm').get_text(strip=True)
except AttributeError:
land_area = 'N/A'
last_sold_price, last_sold_date = parse_sold_details(soup)
capital_value = extract_value(soup, 'Capital Value')
land_value = extract_value(soup, 'Land Value')
improvement_value = extract_value(soup, 'Improvement Value')
# Fetch rental history from property_history.py
rental_history = fetch_property_history(soup)
# Prepare property data for insertion into Supabase
property_data = {
'property_url': property_url,
'address': address,
'suburb': suburb,
'city': city,
'postcode': postcode,
'year_built': year_built,
'bedrooms': bedrooms,
'bathrooms': bathrooms,
'car_spaces': car_spaces,
'floor_size': floor_size,
'land_area': land_area,
'last_sold_price': last_sold_price,
'last_sold_date': last_sold_date,
'capital_value': capital_value,
'land_value': land_value,
'improvement_value': improvement_value,
'has_rental_history': rental_history['has_rental_history'],
'is_currently_rented': rental_history['is_currently_rented']
}
# Prepare history data for insertion into Supabase
history_data = rental_history['history']
return property_data, history_data # Return the data for insertion
else:
print(f"Failed to fetch details for: {property_url}")
return None, None
# Step 3: Parse sold details
def parse_sold_details(soup):
last_sold = soup.find('strong', {'testid': 'lastSoldAttribute'})
if last_sold:
last_sold_text = last_sold.get_text(strip=True)
if 'for' in last_sold_text and 'on' in last_sold_text:
last_sold_price = last_sold_text.split('for')[-1].strip()
# Clean up the last_sold_price: remove '$' and ',' and convert to a float
last_sold_price = last_sold_price.replace('$', '').replace(',', '')
try:
last_sold_price = float(last_sold_price)
except ValueError:
last_sold_price = None # If price cannot be parsed, set it to None
if 'Last Sold on' in last_sold_text:
last_sold_date = last_sold_text.replace('Last Sold on', '').split('for')[0].strip()
else:
last_sold_date = last_sold_text.split('on')[-1].strip()
return last_sold_price, last_sold_date
return None, None
# Helper to extract values like Capital Value, Land Value, Improvement Value
def extract_value(soup, value_type):
try:
value = soup.find('div', string=value_type).find_next_sibling('div').get_text(strip=True)
return value
except AttributeError:
return 'N/A'