dina1 commited on
Commit
9b75509
·
verified ·
1 Parent(s): 24a2777

Update shoalhaven_da_scraper.py

Browse files
Files changed (1) hide show
  1. shoalhaven_da_scraper.py +12 -12
shoalhaven_da_scraper.py CHANGED
@@ -53,27 +53,27 @@ def scrape():
53
 
54
  # Step 2: DA Tracking
55
  page.click("text=DA Tracking")
56
- time.sleep(4)
 
 
57
 
58
- # Step 3: Advanced Search (robust ASP.NET tab click)
59
- page.wait_for_selector("ul[role='tablist']")
60
- page.locator("ul[role='tablist'] >> li").nth(1).click()
61
- page.wait_for_timeout(2000)
62
 
63
- # Step 4: Date range
64
  page.fill("input[name='ctl00$ContentPlaceHolder1$txtFromDate']", "01/10/2025")
65
  page.fill("input[name='ctl00$ContentPlaceHolder1$txtToDate']", "31/10/2025")
66
  page.click("text=Search")
67
  time.sleep(4)
68
 
69
- # Step 5: Show results
70
  page.click("text=Show")
71
  time.sleep(4)
72
 
73
- # Step 6: Loop through result pages
74
  while True:
75
- # Get all rows except header
76
- rows = page.query_selector_all("table tr")[1:]
77
 
78
  for row in rows:
79
  cols = row.query_selector_all("td")
@@ -125,7 +125,7 @@ def scrape():
125
  records.append(record)
126
  detail_page.close()
127
 
128
- # Pagination: check if "Next" button exists
129
  next_btn = page.query_selector("text=Next")
130
  if next_btn and next_btn.is_enabled():
131
  next_btn.click()
@@ -135,7 +135,7 @@ def scrape():
135
 
136
  browser.close()
137
 
138
- # Step 7: Save CSV
139
  df = pd.DataFrame(records, columns=HEADERS)
140
  df.to_csv(OUTPUT_FILE, index=False)
141
  print(f"Saved {len(records)} records to {OUTPUT_FILE}")
 
53
 
54
  # Step 2: DA Tracking
55
  page.click("text=DA Tracking")
56
+ # Wait for the search panel to appear
57
+ page.wait_for_selector("div#ctl00_ContentPlaceHolder1_upSearchPanel", timeout=60000)
58
+ time.sleep(2)
59
 
60
+ # Step 3: Click Advanced Search tab (second tab)
61
+ page.locator("ul#ctl00_ContentPlaceHolder1_tabstrip li").nth(1).click()
62
+ time.sleep(2)
 
63
 
64
+ # Step 4: Enter Date Range
65
  page.fill("input[name='ctl00$ContentPlaceHolder1$txtFromDate']", "01/10/2025")
66
  page.fill("input[name='ctl00$ContentPlaceHolder1$txtToDate']", "31/10/2025")
67
  page.click("text=Search")
68
  time.sleep(4)
69
 
70
+ # Show results
71
  page.click("text=Show")
72
  time.sleep(4)
73
 
74
+ # Step 5: Loop through result pages
75
  while True:
76
+ rows = page.query_selector_all("table tr")[1:] # Skip header
 
77
 
78
  for row in rows:
79
  cols = row.query_selector_all("td")
 
125
  records.append(record)
126
  detail_page.close()
127
 
128
+ # Pagination: check if "Next" button exists and enabled
129
  next_btn = page.query_selector("text=Next")
130
  if next_btn and next_btn.is_enabled():
131
  next_btn.click()
 
135
 
136
  browser.close()
137
 
138
+ # Step 6: Save CSV
139
  df = pd.DataFrame(records, columns=HEADERS)
140
  df.to_csv(OUTPUT_FILE, index=False)
141
  print(f"Saved {len(records)} records to {OUTPUT_FILE}")