Spaces:

Krish-Upgrix
/

Redfin-app

Sleeping

App Files Files Community

Krish-Upgrix commited on Feb 19, 2025

Commit

4fd0f09

verified ·

1 Parent(s): 5bf814a

Update app.py

Browse files

Files changed (1) hide show

app.py +198 -87

app.py CHANGED Viewed

@@ -1,87 +1,198 @@
-import streamlit as st
-import pandas as pd
-import time
-from selenium import webdriver
-from selenium.webdriver.common.by import By
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.support.ui import WebDriverWait
-from selenium.webdriver.support import expected_conditions as EC
-from webdriver_manager.chrome import ChromeDriverManager
-def scrape_redfin(zipcode):
-    options = Options()
-    options.add_argument("--headless")
-    options.add_argument("--incognito")
-    options.add_argument("--disable-blink-features=AutomationControlled")
-    options.add_argument("start-maximized")
-    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
-    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
-    url = f"https://www.redfin.com/zipcode/{zipcode}"
-    driver.get(url)
-    try:
-        listings_container = WebDriverWait(driver, 60).until(
-            EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
-        )
-    except Exception as e:
-        st.error("Error: Listings did not load properly")
-        driver.quit()
-        return pd.DataFrame()
-    scroll_pause_time = 5
-    screen_height = driver.execute_script("return window.innerHeight;")
-    last_height = driver.execute_script("return document.body.scrollHeight")
-    while True:
-        driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
-        time.sleep(scroll_pause_time)
-        new_height = driver.execute_script("return document.body.scrollHeight")
-        if new_height == last_height:
-            break
-        last_height = new_height
-    houses = []
-    listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
-    for listing in listings:
-        try:
-            price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
-        except:
-            price = "N/A"
-        try:
-            address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
-        except:
-            address = "N/A"
-        try:
-            size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
-        except:
-            size = "N/A"
-        try:
-            link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
-        except:
-            link = "N/A"
-        houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
-    driver.quit()
-    return pd.DataFrame(houses)
-st.title("Redfin House Listings Scraper")
-zipcode = st.text_input("Enter ZIP code:")
-if st.button("Scrape Data"):
-    if zipcode:
-        with st.spinner("Scraping data, please wait..."):
-            df = scrape_redfin(zipcode)
-            if not df.empty:
-                st.success("Scraping complete! Here are the available houses:")
-                st.dataframe(df)
-            else:
-                st.warning("No houses found for the given ZIP code.")
-    else:
-        st.error("Please enter a valid ZIP code.")

+import streamlit as st
+import pandas as pd
+import time
+import shutil
+import chromedriver_autoinstaller
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.support.ui import WebDriverWait
+from selenium.webdriver.support import expected_conditions as EC
+def scrape_redfin(zipcode):
+    chromedriver_autoinstaller.install()  # Ensure the correct chromedriver version is installed
+    options = Options()
+    options.add_argument("--headless")  # Run in headless mode
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    options.add_argument("--incognito")
+    options.add_argument("--disable-blink-features=AutomationControlled")
+    options.add_argument("start-maximized")
+    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
+    # Find the installed Chrome binary path
+    chrome_path = shutil.which("google-chrome") or shutil.which("chrome")
+    if chrome_path:
+        options.binary_location = chrome_path
+    driver = webdriver.Chrome(options=options)
+    url = f"https://www.redfin.com/zipcode/{zipcode}"
+    driver.get(url)
+    try:
+        listings_container = WebDriverWait(driver, 60).until(
+            EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
+        )
+    except Exception as e:
+        st.error("Error: Listings did not load properly")
+        driver.quit()
+        return pd.DataFrame()
+    scroll_pause_time = 5
+    screen_height = driver.execute_script("return window.innerHeight;")
+    last_height = driver.execute_script("return document.body.scrollHeight")
+    while True:
+        driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
+        time.sleep(scroll_pause_time)
+        new_height = driver.execute_script("return document.body.scrollHeight")
+        if new_height == last_height:
+            break
+        last_height = new_height
+    houses = []
+    listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
+    for listing in listings:
+        try:
+            price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
+        except:
+            price = "N/A"
+        try:
+            address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
+        except:
+            address = "N/A"
+        try:
+            size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
+        except:
+            size = "N/A"
+        try:
+            link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
+        except:
+            link = "N/A"
+        houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
+    driver.quit()
+    return pd.DataFrame(houses)
+st.title("Redfin House Listings Scraper")
+zipcode = st.text_input("Enter ZIP code:")
+if st.button("Scrape Data"):
+    if zipcode:
+        with st.spinner("Scraping data, please wait..."):
+            df = scrape_redfin(zipcode)
+            if not df.empty:
+                st.success("Scraping complete! Here are the available houses:")
+                st.dataframe(df)
+            else:
+                st.warning("No houses found for the given ZIP code.")
+    else:
+        st.error("Please enter a valid ZIP code.")
+## working best code ever
+# import streamlit as st
+# import pandas as pd
+# import time
+# from selenium import webdriver
+# from selenium.webdriver.common.by import By
+# from selenium.webdriver.chrome.service import Service
+# from selenium.webdriver.chrome.options import Options
+# from selenium.webdriver.support.ui import WebDriverWait
+# from selenium.webdriver.support import expected_conditions as EC
+# from webdriver_manager.chrome import ChromeDriverManager
+# def scrape_redfin(zipcode):
+#     options = Options()
+#     options.add_argument("--headless")
+#     options.add_argument("--incognito")
+#     options.add_argument("--disable-blink-features=AutomationControlled")
+#     options.add_argument("start-maximized")
+#     options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
+#     driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+#     url = f"https://www.redfin.com/zipcode/{zipcode}"
+#     driver.get(url)
+#     try:
+#         listings_container = WebDriverWait(driver, 60).until(
+#             EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
+#         )
+#     except Exception as e:
+#         st.error("Error: Listings did not load properly")
+#         driver.quit()
+#         return pd.DataFrame()
+#     scroll_pause_time = 5
+#     screen_height = driver.execute_script("return window.innerHeight;")
+#     last_height = driver.execute_script("return document.body.scrollHeight")
+#     while True:
+#         driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
+#         time.sleep(scroll_pause_time)
+#         new_height = driver.execute_script("return document.body.scrollHeight")
+#         if new_height == last_height:
+#             break
+#         last_height = new_height
+#     houses = []
+#     listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
+#     for listing in listings:
+#         try:
+#             price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
+#         except:
+#             price = "N/A"
+#         try:
+#             address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
+#         except:
+#             address = "N/A"
+#         try:
+#             size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
+#         except:
+#             size = "N/A"
+#         try:
+#             link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
+#         except:
+#             link = "N/A"
+#         houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
+#     driver.quit()
+#     return pd.DataFrame(houses)
+# st.title("Redfin House Listings Scraper")
+# zipcode = st.text_input("Enter ZIP code:")
+# if st.button("Scrape Data"):
+#     if zipcode:
+#         with st.spinner("Scraping data, please wait..."):
+#             df = scrape_redfin(zipcode)
+#             if not df.empty:
+#                 st.success("Scraping complete! Here are the available houses:")
+#                 st.dataframe(df)
+#             else:
+#                 st.warning("No houses found for the given ZIP code.")
+#     else:
+#         st.error("Please enter a valid ZIP code.")