Krish-Upgrix commited on
Commit
4fd0f09
·
verified ·
1 Parent(s): 5bf814a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -87
app.py CHANGED
@@ -1,87 +1,198 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import time
4
- from selenium import webdriver
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.chrome.service import Service
7
- from selenium.webdriver.chrome.options import Options
8
- from selenium.webdriver.support.ui import WebDriverWait
9
- from selenium.webdriver.support import expected_conditions as EC
10
- from webdriver_manager.chrome import ChromeDriverManager
11
-
12
- def scrape_redfin(zipcode):
13
- options = Options()
14
- options.add_argument("--headless")
15
- options.add_argument("--incognito")
16
- options.add_argument("--disable-blink-features=AutomationControlled")
17
- options.add_argument("start-maximized")
18
- options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
19
-
20
- driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
21
- url = f"https://www.redfin.com/zipcode/{zipcode}"
22
- driver.get(url)
23
-
24
- try:
25
- listings_container = WebDriverWait(driver, 60).until(
26
- EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
27
- )
28
- except Exception as e:
29
- st.error("Error: Listings did not load properly")
30
- driver.quit()
31
- return pd.DataFrame()
32
-
33
- scroll_pause_time = 5
34
- screen_height = driver.execute_script("return window.innerHeight;")
35
- last_height = driver.execute_script("return document.body.scrollHeight")
36
-
37
- while True:
38
- driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
39
- time.sleep(scroll_pause_time)
40
- new_height = driver.execute_script("return document.body.scrollHeight")
41
- if new_height == last_height:
42
- break
43
- last_height = new_height
44
-
45
- houses = []
46
- listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
47
-
48
- for listing in listings:
49
- try:
50
- price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
51
- except:
52
- price = "N/A"
53
-
54
- try:
55
- address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
56
- except:
57
- address = "N/A"
58
-
59
- try:
60
- size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
61
- except:
62
- size = "N/A"
63
-
64
- try:
65
- link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
66
- except:
67
- link = "N/A"
68
-
69
- houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
70
-
71
- driver.quit()
72
- return pd.DataFrame(houses)
73
-
74
- st.title("Redfin House Listings Scraper")
75
- zipcode = st.text_input("Enter ZIP code:")
76
-
77
- if st.button("Scrape Data"):
78
- if zipcode:
79
- with st.spinner("Scraping data, please wait..."):
80
- df = scrape_redfin(zipcode)
81
- if not df.empty:
82
- st.success("Scraping complete! Here are the available houses:")
83
- st.dataframe(df)
84
- else:
85
- st.warning("No houses found for the given ZIP code.")
86
- else:
87
- st.error("Please enter a valid ZIP code.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import time
4
+ import shutil
5
+ import chromedriver_autoinstaller
6
+ from selenium import webdriver
7
+ from selenium.webdriver.common.by import By
8
+ from selenium.webdriver.chrome.service import Service
9
+ from selenium.webdriver.chrome.options import Options
10
+ from selenium.webdriver.support.ui import WebDriverWait
11
+ from selenium.webdriver.support import expected_conditions as EC
12
+
13
+ def scrape_redfin(zipcode):
14
+ chromedriver_autoinstaller.install() # Ensure the correct chromedriver version is installed
15
+
16
+ options = Options()
17
+ options.add_argument("--headless") # Run in headless mode
18
+ options.add_argument("--no-sandbox")
19
+ options.add_argument("--disable-dev-shm-usage")
20
+ options.add_argument("--incognito")
21
+ options.add_argument("--disable-blink-features=AutomationControlled")
22
+ options.add_argument("start-maximized")
23
+ options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
24
+
25
+ # Find the installed Chrome binary path
26
+ chrome_path = shutil.which("google-chrome") or shutil.which("chrome")
27
+ if chrome_path:
28
+ options.binary_location = chrome_path
29
+
30
+ driver = webdriver.Chrome(options=options)
31
+ url = f"https://www.redfin.com/zipcode/{zipcode}"
32
+ driver.get(url)
33
+
34
+ try:
35
+ listings_container = WebDriverWait(driver, 60).until(
36
+ EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
37
+ )
38
+ except Exception as e:
39
+ st.error("Error: Listings did not load properly")
40
+ driver.quit()
41
+ return pd.DataFrame()
42
+
43
+ scroll_pause_time = 5
44
+ screen_height = driver.execute_script("return window.innerHeight;")
45
+ last_height = driver.execute_script("return document.body.scrollHeight")
46
+
47
+ while True:
48
+ driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
49
+ time.sleep(scroll_pause_time)
50
+ new_height = driver.execute_script("return document.body.scrollHeight")
51
+ if new_height == last_height:
52
+ break
53
+ last_height = new_height
54
+
55
+ houses = []
56
+ listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
57
+
58
+ for listing in listings:
59
+ try:
60
+ price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
61
+ except:
62
+ price = "N/A"
63
+
64
+ try:
65
+ address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
66
+ except:
67
+ address = "N/A"
68
+
69
+ try:
70
+ size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
71
+ except:
72
+ size = "N/A"
73
+
74
+ try:
75
+ link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
76
+ except:
77
+ link = "N/A"
78
+
79
+ houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
80
+
81
+ driver.quit()
82
+ return pd.DataFrame(houses)
83
+
84
+ st.title("Redfin House Listings Scraper")
85
+ zipcode = st.text_input("Enter ZIP code:")
86
+
87
+ if st.button("Scrape Data"):
88
+ if zipcode:
89
+ with st.spinner("Scraping data, please wait..."):
90
+ df = scrape_redfin(zipcode)
91
+ if not df.empty:
92
+ st.success("Scraping complete! Here are the available houses:")
93
+ st.dataframe(df)
94
+ else:
95
+ st.warning("No houses found for the given ZIP code.")
96
+ else:
97
+ st.error("Please enter a valid ZIP code.")
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+ ## working best code ever
111
+
112
+ # import streamlit as st
113
+ # import pandas as pd
114
+ # import time
115
+ # from selenium import webdriver
116
+ # from selenium.webdriver.common.by import By
117
+ # from selenium.webdriver.chrome.service import Service
118
+ # from selenium.webdriver.chrome.options import Options
119
+ # from selenium.webdriver.support.ui import WebDriverWait
120
+ # from selenium.webdriver.support import expected_conditions as EC
121
+ # from webdriver_manager.chrome import ChromeDriverManager
122
+
123
+ # def scrape_redfin(zipcode):
124
+ # options = Options()
125
+ # options.add_argument("--headless")
126
+ # options.add_argument("--incognito")
127
+ # options.add_argument("--disable-blink-features=AutomationControlled")
128
+ # options.add_argument("start-maximized")
129
+ # options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
130
+
131
+ # driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
132
+ # url = f"https://www.redfin.com/zipcode/{zipcode}"
133
+ # driver.get(url)
134
+
135
+ # try:
136
+ # listings_container = WebDriverWait(driver, 60).until(
137
+ # EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div"))
138
+ # )
139
+ # except Exception as e:
140
+ # st.error("Error: Listings did not load properly")
141
+ # driver.quit()
142
+ # return pd.DataFrame()
143
+
144
+ # scroll_pause_time = 5
145
+ # screen_height = driver.execute_script("return window.innerHeight;")
146
+ # last_height = driver.execute_script("return document.body.scrollHeight")
147
+
148
+ # while True:
149
+ # driver.execute_script("window.scrollBy(0, arguments[0]);", screen_height // 2)
150
+ # time.sleep(scroll_pause_time)
151
+ # new_height = driver.execute_script("return document.body.scrollHeight")
152
+ # if new_height == last_height:
153
+ # break
154
+ # last_height = new_height
155
+
156
+ # houses = []
157
+ # listings = driver.find_elements(By.XPATH, "/html/body/div[1]/div[6]/div[1]/div[3]/div[1]/div[4]/div/div[1]/div/div")
158
+
159
+ # for listing in listings:
160
+ # try:
161
+ # price = listing.find_element(By.XPATH, ".//div/div/div[2]/div[1]/div[1]/span").text
162
+ # except:
163
+ # price = "N/A"
164
+
165
+ # try:
166
+ # address = listing.find_element(By.XPATH, ".//div/div/div[2]/div[3]").text
167
+ # except:
168
+ # address = "N/A"
169
+
170
+ # try:
171
+ # size = listing.find_element(By.XPATH, ".//div/div/div[2]/div[4]/div").text
172
+ # except:
173
+ # size = "N/A"
174
+
175
+ # try:
176
+ # link = listing.find_element(By.TAG_NAME, "a").get_attribute("href")
177
+ # except:
178
+ # link = "N/A"
179
+
180
+ # houses.append({"Price": price, "Address": address, "Size": size, "Link": link})
181
+
182
+ # driver.quit()
183
+ # return pd.DataFrame(houses)
184
+
185
+ # st.title("Redfin House Listings Scraper")
186
+ # zipcode = st.text_input("Enter ZIP code:")
187
+
188
+ # if st.button("Scrape Data"):
189
+ # if zipcode:
190
+ # with st.spinner("Scraping data, please wait..."):
191
+ # df = scrape_redfin(zipcode)
192
+ # if not df.empty:
193
+ # st.success("Scraping complete! Here are the available houses:")
194
+ # st.dataframe(df)
195
+ # else:
196
+ # st.warning("No houses found for the given ZIP code.")
197
+ # else:
198
+ # st.error("Please enter a valid ZIP code.")