Spaces:
Runtime error
Runtime error
Update shoalhaven_da_scraper.py
Browse files- shoalhaven_da_scraper.py +12 -12
shoalhaven_da_scraper.py
CHANGED
|
@@ -53,27 +53,27 @@ def scrape():
|
|
| 53 |
|
| 54 |
# Step 2: DA Tracking
|
| 55 |
page.click("text=DA Tracking")
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
# Step 3: Advanced Search (
|
| 59 |
-
page.
|
| 60 |
-
|
| 61 |
-
page.wait_for_timeout(2000)
|
| 62 |
|
| 63 |
-
# Step 4: Date
|
| 64 |
page.fill("input[name='ctl00$ContentPlaceHolder1$txtFromDate']", "01/10/2025")
|
| 65 |
page.fill("input[name='ctl00$ContentPlaceHolder1$txtToDate']", "31/10/2025")
|
| 66 |
page.click("text=Search")
|
| 67 |
time.sleep(4)
|
| 68 |
|
| 69 |
-
#
|
| 70 |
page.click("text=Show")
|
| 71 |
time.sleep(4)
|
| 72 |
|
| 73 |
-
# Step
|
| 74 |
while True:
|
| 75 |
-
|
| 76 |
-
rows = page.query_selector_all("table tr")[1:]
|
| 77 |
|
| 78 |
for row in rows:
|
| 79 |
cols = row.query_selector_all("td")
|
|
@@ -125,7 +125,7 @@ def scrape():
|
|
| 125 |
records.append(record)
|
| 126 |
detail_page.close()
|
| 127 |
|
| 128 |
-
# Pagination: check if "Next" button exists
|
| 129 |
next_btn = page.query_selector("text=Next")
|
| 130 |
if next_btn and next_btn.is_enabled():
|
| 131 |
next_btn.click()
|
|
@@ -135,7 +135,7 @@ def scrape():
|
|
| 135 |
|
| 136 |
browser.close()
|
| 137 |
|
| 138 |
-
# Step
|
| 139 |
df = pd.DataFrame(records, columns=HEADERS)
|
| 140 |
df.to_csv(OUTPUT_FILE, index=False)
|
| 141 |
print(f"Saved {len(records)} records to {OUTPUT_FILE}")
|
|
|
|
| 53 |
|
| 54 |
# Step 2: DA Tracking
|
| 55 |
page.click("text=DA Tracking")
|
| 56 |
+
# Wait for the search panel to appear
|
| 57 |
+
page.wait_for_selector("div#ctl00_ContentPlaceHolder1_upSearchPanel", timeout=60000)
|
| 58 |
+
time.sleep(2)
|
| 59 |
|
| 60 |
+
# Step 3: Click Advanced Search tab (second tab)
|
| 61 |
+
page.locator("ul#ctl00_ContentPlaceHolder1_tabstrip li").nth(1).click()
|
| 62 |
+
time.sleep(2)
|
|
|
|
| 63 |
|
| 64 |
+
# Step 4: Enter Date Range
|
| 65 |
page.fill("input[name='ctl00$ContentPlaceHolder1$txtFromDate']", "01/10/2025")
|
| 66 |
page.fill("input[name='ctl00$ContentPlaceHolder1$txtToDate']", "31/10/2025")
|
| 67 |
page.click("text=Search")
|
| 68 |
time.sleep(4)
|
| 69 |
|
| 70 |
+
# Show results
|
| 71 |
page.click("text=Show")
|
| 72 |
time.sleep(4)
|
| 73 |
|
| 74 |
+
# Step 5: Loop through result pages
|
| 75 |
while True:
|
| 76 |
+
rows = page.query_selector_all("table tr")[1:] # Skip header
|
|
|
|
| 77 |
|
| 78 |
for row in rows:
|
| 79 |
cols = row.query_selector_all("td")
|
|
|
|
| 125 |
records.append(record)
|
| 126 |
detail_page.close()
|
| 127 |
|
| 128 |
+
# Pagination: check if "Next" button exists and enabled
|
| 129 |
next_btn = page.query_selector("text=Next")
|
| 130 |
if next_btn and next_btn.is_enabled():
|
| 131 |
next_btn.click()
|
|
|
|
| 135 |
|
| 136 |
browser.close()
|
| 137 |
|
| 138 |
+
# Step 6: Save CSV
|
| 139 |
df = pd.DataFrame(records, columns=HEADERS)
|
| 140 |
df.to_csv(OUTPUT_FILE, index=False)
|
| 141 |
print(f"Saved {len(records)} records to {OUTPUT_FILE}")
|