Spaces:

NZLouislu
/

NZProperty

Paused

File size: 5,004 Bytes

3624bf2

import requests
from bs4 import BeautifulSoup
from property_history import fetch_property_history
from config.supabase_config import insert_property_and_history  # Assuming this function exists

# 固定的城市和 suburb
# CITY = "Porirua City"
# SUBURB = "Aotea"

# Fetch property details
def fetch_property_details(property_url, title, city, suburb):
    print(f"\nFetching details for {property_url}...")
    response = requests.get(property_url)

    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extracting property details
        address_line1 = title.split(',')[0].strip()  # 从 title 中提取 address_line1
        address_line2 = soup.find('span', {'testid': 'addressLine2'}).get_text(strip=True) if soup.find('span', {'testid': 'addressLine2'}) else 'N/A'
        postcode = title.split(',')[-1].strip()  # 获取最后一个逗号后的邮政编码

        # Combine address_line1 and address_line2 into a single address field
        address = f"{address_line1}, {address_line2}"

        # suburb = SUBURB
        # city = CITY

        try:
            year_built = int(soup.find('div', {'testid': 'yearBuiltValue'}).get_text(strip=True))
        except (AttributeError, ValueError):
            year_built = None

        try:
            bedrooms = int(soup.find('span', {'testid': 'bed'}).get_text(strip=True))
        except (AttributeError, ValueError):
            bedrooms = None

        try:
            bathrooms = int(soup.find('span', {'testid': 'bath'}).get_text(strip=True))
        except (AttributeError, ValueError):
            bathrooms = None

        try:
            car_spaces = int(soup.find('span', {'testid': 'car'}).get_text(strip=True))
        except (AttributeError, ValueError):
            car_spaces = None

        try:
            floor_size = soup.find('span', class_='floor PropertyAttributes_attribute__3bkWm').get_text(strip=True)
        except AttributeError:
            floor_size = 'N/A'

        try:
            land_area = soup.find('span', class_='land PropertyAttributes_attribute__3bkWm').get_text(strip=True)
        except AttributeError:
            land_area = 'N/A'

        last_sold_price, last_sold_date = parse_sold_details(soup)

        capital_value = extract_value(soup, 'Capital Value')
        land_value = extract_value(soup, 'Land Value')
        improvement_value = extract_value(soup, 'Improvement Value')

        # Fetch rental history from property_history.py
        rental_history = fetch_property_history(soup)

        # Prepare property data for insertion into Supabase
        property_data = {
            'property_url': property_url,
            'address': address,
            'suburb': suburb,
            'city': city,
            'postcode': postcode,
            'year_built': year_built,
            'bedrooms': bedrooms,
            'bathrooms': bathrooms,
            'car_spaces': car_spaces,
            'floor_size': floor_size,
            'land_area': land_area,
            'last_sold_price': last_sold_price,
            'last_sold_date': last_sold_date,
            'capital_value': capital_value,
            'land_value': land_value,
            'improvement_value': improvement_value,
            'has_rental_history': rental_history['has_rental_history'],
            'is_currently_rented': rental_history['is_currently_rented']
        }

        # Prepare history data for insertion into Supabase
        history_data = rental_history['history']

        return property_data, history_data  # Return the data for insertion

    else:
        print(f"Failed to fetch details for: {property_url}")
        return None, None

# Step 3: Parse sold details
def parse_sold_details(soup):
    last_sold = soup.find('strong', {'testid': 'lastSoldAttribute'})
    if last_sold:
        last_sold_text = last_sold.get_text(strip=True)

        if 'for' in last_sold_text and 'on' in last_sold_text:
            last_sold_price = last_sold_text.split('for')[-1].strip()

            # Clean up the last_sold_price: remove '$' and ',' and convert to a float
            last_sold_price = last_sold_price.replace('$', '').replace(',', '')
            try:
                last_sold_price = float(last_sold_price)
            except ValueError:
                last_sold_price = None  # If price cannot be parsed, set it to None

            if 'Last Sold on' in last_sold_text:
                last_sold_date = last_sold_text.replace('Last Sold on', '').split('for')[0].strip()
            else:
                last_sold_date = last_sold_text.split('on')[-1].strip()

            return last_sold_price, last_sold_date

    return None, None


# Helper to extract values like Capital Value, Land Value, Improvement Value
def extract_value(soup, value_type):
    try:
        value = soup.find('div', string=value_type).find_next_sibling('div').get_text(strip=True)
        return value
    except AttributeError:
        return 'N/A'