Spaces:
Paused
Paused
| import requests | |
| from bs4 import BeautifulSoup | |
| from property_history import fetch_property_history | |
| from config.supabase_config import insert_property_and_history # Assuming this function exists | |
| # 固定的城市和 suburb | |
| # CITY = "Porirua City" | |
| # SUBURB = "Aotea" | |
| # Fetch property details | |
| def fetch_property_details(property_url, title, city, suburb): | |
| print(f"\nFetching details for {property_url}...") | |
| response = requests.get(property_url) | |
| if response.status_code == 200: | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Extracting property details | |
| address_line1 = title.split(',')[0].strip() # 从 title 中提取 address_line1 | |
| address_line2 = soup.find('span', {'testid': 'addressLine2'}).get_text(strip=True) if soup.find('span', {'testid': 'addressLine2'}) else 'N/A' | |
| postcode = title.split(',')[-1].strip() # 获取最后一个逗号后的邮政编码 | |
| # Combine address_line1 and address_line2 into a single address field | |
| address = f"{address_line1}, {address_line2}" | |
| # suburb = SUBURB | |
| # city = CITY | |
| try: | |
| year_built = int(soup.find('div', {'testid': 'yearBuiltValue'}).get_text(strip=True)) | |
| except (AttributeError, ValueError): | |
| year_built = None | |
| try: | |
| bedrooms = int(soup.find('span', {'testid': 'bed'}).get_text(strip=True)) | |
| except (AttributeError, ValueError): | |
| bedrooms = None | |
| try: | |
| bathrooms = int(soup.find('span', {'testid': 'bath'}).get_text(strip=True)) | |
| except (AttributeError, ValueError): | |
| bathrooms = None | |
| try: | |
| car_spaces = int(soup.find('span', {'testid': 'car'}).get_text(strip=True)) | |
| except (AttributeError, ValueError): | |
| car_spaces = None | |
| try: | |
| floor_size = soup.find('span', class_='floor PropertyAttributes_attribute__3bkWm').get_text(strip=True) | |
| except AttributeError: | |
| floor_size = 'N/A' | |
| try: | |
| land_area = soup.find('span', class_='land PropertyAttributes_attribute__3bkWm').get_text(strip=True) | |
| except AttributeError: | |
| land_area = 'N/A' | |
| last_sold_price, last_sold_date = parse_sold_details(soup) | |
| capital_value = extract_value(soup, 'Capital Value') | |
| land_value = extract_value(soup, 'Land Value') | |
| improvement_value = extract_value(soup, 'Improvement Value') | |
| # Fetch rental history from property_history.py | |
| rental_history = fetch_property_history(soup) | |
| # Prepare property data for insertion into Supabase | |
| property_data = { | |
| 'property_url': property_url, | |
| 'address': address, | |
| 'suburb': suburb, | |
| 'city': city, | |
| 'postcode': postcode, | |
| 'year_built': year_built, | |
| 'bedrooms': bedrooms, | |
| 'bathrooms': bathrooms, | |
| 'car_spaces': car_spaces, | |
| 'floor_size': floor_size, | |
| 'land_area': land_area, | |
| 'last_sold_price': last_sold_price, | |
| 'last_sold_date': last_sold_date, | |
| 'capital_value': capital_value, | |
| 'land_value': land_value, | |
| 'improvement_value': improvement_value, | |
| 'has_rental_history': rental_history['has_rental_history'], | |
| 'is_currently_rented': rental_history['is_currently_rented'] | |
| } | |
| # Prepare history data for insertion into Supabase | |
| history_data = rental_history['history'] | |
| return property_data, history_data # Return the data for insertion | |
| else: | |
| print(f"Failed to fetch details for: {property_url}") | |
| return None, None | |
| # Step 3: Parse sold details | |
| def parse_sold_details(soup): | |
| last_sold = soup.find('strong', {'testid': 'lastSoldAttribute'}) | |
| if last_sold: | |
| last_sold_text = last_sold.get_text(strip=True) | |
| if 'for' in last_sold_text and 'on' in last_sold_text: | |
| last_sold_price = last_sold_text.split('for')[-1].strip() | |
| # Clean up the last_sold_price: remove '$' and ',' and convert to a float | |
| last_sold_price = last_sold_price.replace('$', '').replace(',', '') | |
| try: | |
| last_sold_price = float(last_sold_price) | |
| except ValueError: | |
| last_sold_price = None # If price cannot be parsed, set it to None | |
| if 'Last Sold on' in last_sold_text: | |
| last_sold_date = last_sold_text.replace('Last Sold on', '').split('for')[0].strip() | |
| else: | |
| last_sold_date = last_sold_text.split('on')[-1].strip() | |
| return last_sold_price, last_sold_date | |
| return None, None | |
| # Helper to extract values like Capital Value, Land Value, Improvement Value | |
| def extract_value(soup, value_type): | |
| try: | |
| value = soup.find('div', string=value_type).find_next_sibling('div').get_text(strip=True) | |
| return value | |
| except AttributeError: | |
| return 'N/A' | |