Spaces:

Krish-Upgrix
/

Redfin-app

Sleeping

App Files Files Community

Redfin-app / app.py

Krish-Upgrix

Update app.py

a154445 verified 12 months ago

raw

history blame contribute delete

7.42 kB

	import streamlit as st
	import pandas as pd
	import time
	import os
	import subprocess
	import chromedriver_autoinstaller
	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.chrome.service import Service
	from selenium.webdriver.chrome.options import Options
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC

	def install_chrome():
	if not os.path.exists("/usr/bin/chromium-browser"):
	subprocess.run(["apt-get", "update"], check=True)
	subprocess.run(["apt-get", "install", "-y", "chromium-browser"], check=True)
	os.environ["PATH"] += os.pathsep + "/usr/bin/"

	def scrape_redfin(zipcode):
	install_chrome() # Ensure Chrome/Chromium is installed
	chromedriver_autoinstaller.install() # Ensure the correct chromedriver version is installed

	options = Options()
	options.add_argument("--headless") # Run in headless mode
	options.add_argument("--no-sandbox")
	options.add_argument("--disable-dev-shm-usage")
	options.add_argument("--incognito")
	options.add_argument("--disable-blink-features=AutomationControlled")
	options.add_argument("start-maximized")
	options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

	options.binary_location = "/usr/bin/chromium-browser" # Use Chromium
	service = Service(chromedriver_autoinstaller.install())
	driver = webdriver.Chrome(service=service, options=options)
	url = f"https://www.redfin.com/zipcode/{zipcode}"
	driver.get(url)

	try:
	listings_container = WebDriverWait(driver, 60).until(
	EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
	)
	except Exception as e:
	st.error("Error: Listings did not load properly")
	driver.quit()
	return pd.DataFrame()

	scroll_pause_time = 5
	screen_height = driver.execute_script("return window.innerHeight;")
	last_height = driver.execute_script("return document.body.scrollHeight")

	while True:
	driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
	time.sleep(scroll_pause_time)
	new_height = driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break
	last_height = new_height

	houses = []
	listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")

	for listing in listings:
	try:
	price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
	except:
	price = "N/A"

	try:
	address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
	except:
	address = "N/A"

	try:
	size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
	except:
	size = "N/A"

	try:
	link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
	except:
	link = "N/A"

	houses.append({"Price": price, "Address": address, "Size": size, "Link": link})

	driver.quit()
	return pd.DataFrame(houses)

	st.title("Redfin House Listings Scraper")
	zipcode = st.text_input("Enter ZIP code:")

	if st.button("Scrape Data"):
	if zipcode:
	with st.spinner("Scraping data, please wait..."):
	df = scrape_redfin(zipcode)
	if not df.empty:
	st.success("Scraping complete! Here are the available houses:")
	st.dataframe(df)
	else:
	st.warning("No houses found for the given ZIP code.")
	else:
	st.error("Please enter a valid ZIP code.")














	## working best code ever

	# import streamlit as st
	# import pandas as pd
	# import time
	# from selenium import webdriver
	# from selenium.webdriver.common.by import By
	# from selenium.webdriver.chrome.service import Service
	# from selenium.webdriver.chrome.options import Options
	# from selenium.webdriver.support.ui import WebDriverWait
	# from selenium.webdriver.support import expected_conditions as EC
	# from webdriver_manager.chrome import ChromeDriverManager

	# def scrape_redfin(zipcode):
	# options = Options()
	# options.add_argument("--headless")
	# options.add_argument("--incognito")
	# options.add_argument("--disable-blink-features=AutomationControlled")
	# options.add_argument("start-maximized")
	# options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")

	# driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
	# url = f"https://www.redfin.com/zipcode/{zipcode}"
	# driver.get(url)

	# try:
	# listings_container = WebDriverWait(driver, 60).until(
	# EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
	# )
	# except Exception as e:
	# st.error("Error: Listings did not load properly")
	# driver.quit()
	# return pd.DataFrame()

	# scroll_pause_time = 5
	# screen_height = driver.execute_script("return window.innerHeight;")
	# last_height = driver.execute_script("return document.body.scrollHeight")

	# while True:
	# driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
	# time.sleep(scroll_pause_time)
	# new_height = driver.execute_script("return document.body.scrollHeight")
	# if new_height == last_height:
	# break
	# last_height = new_height

	# houses = []
	# listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")

	# for listing in listings:
	# try:
	# price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
	# except:
	# price = "N/A"

	# try:
	# address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
	# except:
	# address = "N/A"

	# try:
	# size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
	# except:
	# size = "N/A"

	# try:
	# link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
	# except:
	# link = "N/A"

	# houses.append({"Price": price, "Address": address, "Size": size, "Link": link})

	# driver.quit()
	# return pd.DataFrame(houses)

	# st.title("Redfin House Listings Scraper")
	# zipcode = st.text_input("Enter ZIP code:")

	# if st.button("Scrape Data"):
	# if zipcode:
	# with st.spinner("Scraping data, please wait..."):
	# df = scrape_redfin(zipcode)
	# if not df.empty:
	# st.success("Scraping complete! Here are the available houses:")
	# st.dataframe(df)
	# else:
	# st.warning("No houses found for the given ZIP code.")
	# else:
	# st.error("Please enter a valid ZIP code.")