Krish-Upgrix commited on
Commit
547da0a
·
verified ·
1 Parent(s): 8c1fc76

Upload 2 files

Browse files
Files changed (2) hide show
  1. redfin_app.py +87 -0
  2. requirements.txt +7 -0
redfin_app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ from selenium import webdriver
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.chrome.service import Service
7
+ from selenium.webdriver.chrome.options import Options
8
+ from selenium.webdriver.support.ui import WebDriverWait
9
+ from selenium.webdriver.support import expected_conditions as EC
10
+ from webdriver_manager.chrome import ChromeDriverManager
11
+
12
+ def scrape_redfin(zipcode):
13
+ options = Options()
14
+ options.add_argument("--headless")
15
+ options.add_argument("--incognito")
16
+ options.add_argument("--disable-blink-features=AutomationControlled")
17
+ options.add_argument("start-maximized")
18
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
19
+
20
+ driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
21
+ url = f"https://www.redfin.com/zipcode/{zipcode}"
22
+ driver.get(url)
23
+
24
+ try:
25
+ listings_container = WebDriverWait(driver, 60).until(
26
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
27
+ )
28
+ except Exception as e:
29
+ st.error("Error: Listings did not load properly")
30
+ driver.quit()
31
+ return pd.DataFrame()
32
+
33
+ scroll_pause_time = 5
34
+ screen_height = driver.execute_script("return window.innerHeight;")
35
+ last_height = driver.execute_script("return document.body.scrollHeight")
36
+
37
+ while True:
38
+ driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
39
+ time.sleep(scroll_pause_time)
40
+ new_height = driver.execute_script("return document.body.scrollHeight")
41
+ if new_height == last_height:
42
+ break
43
+ last_height = new_height
44
+
45
+ houses = []
46
+ listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
47
+
48
+ for listing in listings:
49
+ try:
50
+ price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
51
+ except:
52
+ price = "N/A"
53
+
54
+ try:
55
+ address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
56
+ except:
57
+ address = "N/A"
58
+
59
+ try:
60
+ size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
61
+ except:
62
+ size = "N/A"
63
+
64
+ try:
65
+ link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
66
+ except:
67
+ link = "N/A"
68
+
69
+ houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
70
+
71
+ driver.quit()
72
+ return pd.DataFrame(houses)
73
+
74
+ st.title("Redfin House Listings Scraper")
75
+ zipcode = st.text_input("Enter ZIP code:")
76
+
77
+ if st.button("Scrape Data"):
78
+ if zipcode:
79
+ with st.spinner("Scraping data, please wait..."):
80
+ df = scrape_redfin(zipcode)
81
+ if not df.empty:
82
+ st.success("Scraping complete! Here are the available houses:")
83
+ st.dataframe(df)
84
+ else:
85
+ st.warning("No houses found for the given ZIP code.")
86
+ else:
87
+ st.error("Please enter a valid ZIP code.")
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ selenium
2
+ pandas
3
+ webdriver-manager
4
+ undetected-chromedriver
5
+ setuptools
6
+ selenium-stealth
7
+ streamlit