Spaces:

dina1
/

web_scraping

Runtime error

dina1 commited on Dec 20, 2025

Commit

9b75509

verified ·

1 Parent(s): 24a2777

Update shoalhaven_da_scraper.py

Files changed (1) hide show

shoalhaven_da_scraper.py CHANGED Viewed

@@ -53,27 +53,27 @@ def scrape():
         # Step 2: DA Tracking
         page.click("text=DA Tracking")
-        time.sleep(4)
-        # Step 3: Advanced Search (robust ASP.NET tab click)
-        page.wait_for_selector("ul[role='tablist']")
-        page.locator("ul[role='tablist'] >> li").nth(1).click()
-        page.wait_for_timeout(2000)
-        # Step 4: Date range
         page.fill("input[name='ctl00$ContentPlaceHolder1$txtFromDate']", "01/10/2025")
         page.fill("input[name='ctl00$ContentPlaceHolder1$txtToDate']", "31/10/2025")
         page.click("text=Search")
         time.sleep(4)
-        # Step 5: Show results
         page.click("text=Show")
         time.sleep(4)
-        # Step 6: Loop through result pages
         while True:
-            # Get all rows except header
-            rows = page.query_selector_all("table tr")[1:]
             for row in rows:
                 cols = row.query_selector_all("td")
@@ -125,7 +125,7 @@ def scrape():
                 records.append(record)
                 detail_page.close()
-            # Pagination: check if "Next" button exists
             next_btn = page.query_selector("text=Next")
             if next_btn and next_btn.is_enabled():
                 next_btn.click()
@@ -135,7 +135,7 @@ def scrape():
         browser.close()
-    # Step 7: Save CSV
     df = pd.DataFrame(records, columns=HEADERS)
     df.to_csv(OUTPUT_FILE, index=False)
     print(f"Saved {len(records)} records to {OUTPUT_FILE}")

         # Step 2: DA Tracking
         page.click("text=DA Tracking")
+        # Wait for the search panel to appear
+        page.wait_for_selector("div#ctl00_ContentPlaceHolder1_upSearchPanel", timeout=60000)
+        time.sleep(2)
+        # Step 3: Click Advanced Search tab (second tab)
+        page.locator("ul#ctl00_ContentPlaceHolder1_tabstrip li").nth(1).click()
+        time.sleep(2)
+        # Step 4: Enter Date Range
         page.fill("input[name='ctl00$ContentPlaceHolder1$txtFromDate']", "01/10/2025")
         page.fill("input[name='ctl00$ContentPlaceHolder1$txtToDate']", "31/10/2025")
         page.click("text=Search")
         time.sleep(4)
+        # Show results
         page.click("text=Show")
         time.sleep(4)
+        # Step 5: Loop through result pages
         while True:
+            rows = page.query_selector_all("table tr")[1:]  # Skip header
             for row in rows:
                 cols = row.query_selector_all("td")
                 records.append(record)
                 detail_page.close()
+            # Pagination: check if "Next" button exists and enabled
             next_btn = page.query_selector("text=Next")
             if next_btn and next_btn.is_enabled():
                 next_btn.click()
         browser.close()
+    # Step 6: Save CSV
     df = pd.DataFrame(records, columns=HEADERS)
     df.to_csv(OUTPUT_FILE, index=False)
     print(f"Saved {len(records)} records to {OUTPUT_FILE}")