Spaces:
Paused
Paused
File size: 5,004 Bytes
3624bf2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | import requests
from bs4 import BeautifulSoup
from property_history import fetch_property_history
from config.supabase_config import insert_property_and_history # Assuming this function exists
# 固定的城市和 suburb
# CITY = "Porirua City"
# SUBURB = "Aotea"
# Fetch property details
def fetch_property_details(property_url, title, city, suburb):
print(f"\nFetching details for {property_url}...")
response = requests.get(property_url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
# Extracting property details
address_line1 = title.split(',')[0].strip() # 从 title 中提取 address_line1
address_line2 = soup.find('span', {'testid': 'addressLine2'}).get_text(strip=True) if soup.find('span', {'testid': 'addressLine2'}) else 'N/A'
postcode = title.split(',')[-1].strip() # 获取最后一个逗号后的邮政编码
# Combine address_line1 and address_line2 into a single address field
address = f"{address_line1}, {address_line2}"
# suburb = SUBURB
# city = CITY
try:
year_built = int(soup.find('div', {'testid': 'yearBuiltValue'}).get_text(strip=True))
except (AttributeError, ValueError):
year_built = None
try:
bedrooms = int(soup.find('span', {'testid': 'bed'}).get_text(strip=True))
except (AttributeError, ValueError):
bedrooms = None
try:
bathrooms = int(soup.find('span', {'testid': 'bath'}).get_text(strip=True))
except (AttributeError, ValueError):
bathrooms = None
try:
car_spaces = int(soup.find('span', {'testid': 'car'}).get_text(strip=True))
except (AttributeError, ValueError):
car_spaces = None
try:
floor_size = soup.find('span', class_='floor PropertyAttributes_attribute__3bkWm').get_text(strip=True)
except AttributeError:
floor_size = 'N/A'
try:
land_area = soup.find('span', class_='land PropertyAttributes_attribute__3bkWm').get_text(strip=True)
except AttributeError:
land_area = 'N/A'
last_sold_price, last_sold_date = parse_sold_details(soup)
capital_value = extract_value(soup, 'Capital Value')
land_value = extract_value(soup, 'Land Value')
improvement_value = extract_value(soup, 'Improvement Value')
# Fetch rental history from property_history.py
rental_history = fetch_property_history(soup)
# Prepare property data for insertion into Supabase
property_data = {
'property_url': property_url,
'address': address,
'suburb': suburb,
'city': city,
'postcode': postcode,
'year_built': year_built,
'bedrooms': bedrooms,
'bathrooms': bathrooms,
'car_spaces': car_spaces,
'floor_size': floor_size,
'land_area': land_area,
'last_sold_price': last_sold_price,
'last_sold_date': last_sold_date,
'capital_value': capital_value,
'land_value': land_value,
'improvement_value': improvement_value,
'has_rental_history': rental_history['has_rental_history'],
'is_currently_rented': rental_history['is_currently_rented']
}
# Prepare history data for insertion into Supabase
history_data = rental_history['history']
return property_data, history_data # Return the data for insertion
else:
print(f"Failed to fetch details for: {property_url}")
return None, None
# Step 3: Parse sold details
def parse_sold_details(soup):
last_sold = soup.find('strong', {'testid': 'lastSoldAttribute'})
if last_sold:
last_sold_text = last_sold.get_text(strip=True)
if 'for' in last_sold_text and 'on' in last_sold_text:
last_sold_price = last_sold_text.split('for')[-1].strip()
# Clean up the last_sold_price: remove '$' and ',' and convert to a float
last_sold_price = last_sold_price.replace('$', '').replace(',', '')
try:
last_sold_price = float(last_sold_price)
except ValueError:
last_sold_price = None # If price cannot be parsed, set it to None
if 'Last Sold on' in last_sold_text:
last_sold_date = last_sold_text.replace('Last Sold on', '').split('for')[0].strip()
else:
last_sold_date = last_sold_text.split('on')[-1].strip()
return last_sold_price, last_sold_date
return None, None
# Helper to extract values like Capital Value, Land Value, Improvement Value
def extract_value(soup, value_type):
try:
value = soup.find('div', string=value_type).find_next_sibling('div').get_text(strip=True)
return value
except AttributeError:
return 'N/A'
|