Spaces:
Sleeping
Sleeping
| /** | |
| * =================================================================================== | |
| * === Google Maps - Fully Automated Georgia Gas Station Scraper ===================== | |
| * =================================================================================== | |
| * | |
| * WARNING: THIS SCRIPT VIOLATES GOOGLE'S TERMS OF SERVICE. USE AT YOUR OWN RISK. | |
| * IT IS FOR EDUCATIONAL PURPOSES ONLY. | |
| * | |
| * This script will automatically: | |
| * 1. Loop through a list of all 159 counties in Georgia. | |
| * 2. Skip any counties that have already been scraped and saved. | |
| * 3. Type the search query into the Google Maps search bar. | |
| * 4. Click the search button. | |
| * 5. Wait for results to load. | |
| * 6. Scrape all business details on the current page. | |
| * 7. Scroll and click the "Next page" button until all pages are scraped. | |
| * 8. Save the collected data for the county into localStorage. | |
| * 9. Move to the next county. | |
| * | |
| * =================================================================================== | |
| */ | |
| (async () => { | |
| // --- Configuration & Data --- | |
| const counties = ["Appling", "Atkinson", "Bacon", "Baker", "Baldwin", "Banks", "Barrow", "Bartow", "Ben Hill", "Berrien", "Bibb", "Bleckley", "Brantley", "Brooks", "Bryan", "Bulloch", "Burke", "Butts", "Calhoun", "Camden", "Candler", "Carroll", "Catoosa", "Charlton", "Chatham", "Chattahoochee", "Chattooga", "Cherokee", "Clarke", "Clay", "Clayton", "Clinch", "Cobb", "Coffee", "Colquitt", "Columbia", "Cook", "Coweta", "Crawford", "Crisp", "Dade", "Dawson", "Decatur", "DeKalb", "Dodge", "Dooly", "Dougherty", "Douglas", "Early", "Echols", "Effingham", "Elbert", "Emanuel", "Evans", "Fannin", "Fayette", "Floyd", "Forsyth", "Franklin", "Fulton", "Gilmer", "Glascock", "Glynn", "Gordon", "Grady", "Greene", "Gwinnett", "Habersham", "Hall", "Hancock", "Haralson", "Harris", "Hart", "Heard", "Henry", "Houston", "Irwin", "Jackson", "Jasper", "Jeff Davis", "Jefferson", "Jenkins", "Johnson", "Jones", "Lamar", "Lanier", "Laurens", "Lee", "Liberty", "Lincoln", "Long", "Lowndes", "Lumpkin", "Macon", "Madison", "Marion", "McDuffie", "McIntosh", "Meriwether", "Miller", "Mitchell", "Monroe", "Montgomery", "Morgan", "Murray", "Muscogee", "Newton", "Oconee", "Oglethorpe", "Paulding", "Peach", "Pickens", "Pierce", "Pike", "Polk", "Pulaski", "Putnam", "Quitman", "Rabun", "Randolph", "Richmond", "Rockdale", "Schley", "Screven", "Seminole", "Spalding", "Stephens", "Stewart", "Sumter", "Talbot", "Taliaferro", "Tattnall", "Taylor", "Telfair", "Terrell", "Thomas", "Tift", "Toombs", "Towns", "Treutlen", "Troup", "Turner", "Twiggs", "Union", "Upson", "Walker", "Walton", "Ware", "Warren", "Washington", "Wayne", "Webster", "Wheeler", "White", "Whitfield", "Wilcox", "Wilkes", "Wilkinson", "Worth"]; | |
| const LOCAL_STORAGE_KEY = "gaGasStationsDB"; | |
| // --- Helper Functions --- | |
| const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); | |
| const randomDelay = (min = 1000, max = 3000) => Math.floor(Math.random() * (max - min + 1) + min); | |
| const waitFor = (selector, timeout = 20000) => | |
| new Promise((resolve, reject) => { | |
| const interval = 100; | |
| let time = 0; | |
| const timer = setInterval(() => { | |
| const el = document.querySelector(selector); | |
| if (el) { | |
| clearInterval(timer); | |
| resolve(el); | |
| } | |
| time += interval; | |
| if (time >= timeout) { | |
| clearInterval(timer); | |
| reject(new Error(`Timeout waiting for selector: ${selector}`)); | |
| } | |
| }, interval); | |
| }); | |
| // More robust way to set input value for sites using frameworks like React | |
| function setNativeValue(element, value) { | |
| const valueSetter = Object.getOwnPropertyDescriptor(element, "value")?.set; | |
| const prototype = Object.getPrototypeOf(element); | |
| const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, "value")?.set; | |
| if (prototypeValueSetter) { | |
| prototypeValueSetter.call(element, value); | |
| } else if (valueSetter) { | |
| valueSetter.call(element, value); | |
| } else { | |
| element.value = value; | |
| } | |
| element.dispatchEvent(new Event("input", { bubbles: true })); | |
| element.dispatchEvent(new Event("change", { bubbles: true })); | |
| } | |
| // --- Main Logic --- | |
| let db = JSON.parse(localStorage.getItem(LOCAL_STORAGE_KEY) || "{}"); | |
| const countiesToDo = counties.filter((c) => !db[c]); | |
| if (countiesToDo.length === 0) { | |
| console.log("π All counties have already been scraped! Data is in localStorage."); | |
| console.log('To view it, run: JSON.parse(localStorage.getItem("' + LOCAL_STORAGE_KEY + '"))'); | |
| console.log('To restart, run: localStorage.removeItem("' + LOCAL_STORAGE_KEY + '")'); | |
| return; | |
| } | |
| console.log(`π Starting scraper. ${counties.length} total counties, ${countiesToDo.length} remaining.`); | |
| await sleep(2000); | |
| for (const county of countiesToDo) { | |
| console.log(`====================================================`); | |
| console.log(`π Now processing: ${county} County`); | |
| try { | |
| // 1. Perform Search | |
| const searchInput = await waitFor("input#searchboxinput"); | |
| const searchButton = await waitFor("button#searchbox-searchbutton"); | |
| const searchQuery = `gas station in ${county} County, Georgia, USA`; | |
| console.log(` - Typing search query: "${searchQuery}"`); | |
| setNativeValue(searchInput, searchQuery); | |
| await sleep(randomDelay(500, 1000)); | |
| console.log(` - Clicking search button...`); | |
| searchButton.click(); | |
| // 2. Wait for results panel and scrape | |
| const resultsPanelSelector = `div[aria-label*="Results for gas station in ${county} County"]`; | |
| const panel = await waitFor(resultsPanelSelector); | |
| console.log(` - Results panel loaded.`); | |
| await sleep(randomDelay(3000, 5000)); // Wait for results to populate | |
| const stations = []; | |
| let page = 1; | |
| let LastLength = 0; | |
| while (true) { | |
| console.log(` - Scraping page ${page}...`); | |
| // Scroll to bottom to ensure all results on the page are rendered | |
| panel.scrollTop = panel.scrollHeight; | |
| await sleep(randomDelay(2000, 3500)); | |
| panel.scrollTop = panel.scrollHeight; // scroll again to be sure | |
| await sleep(1000); | |
| // This selector targets the link that wraps each search result item | |
| const resultItems = document.querySelectorAll('a[href*="https://www.google.com/maps/place/"]'); | |
| console.log(` - Found ${resultItems.length} result items.`); | |
| if (LastLength === resultItems.length) { | |
| console.log(` - No new results found.`); | |
| break; | |
| } | |
| LastLength = resultItems.length; | |
| resultItems.forEach((item) => { | |
| // Check if this item is actually a result in our list, not a map pin link | |
| if (!item.closest(resultsPanelSelector)) return; | |
| const detailsContainer = item.nextElementSibling.nextElementSibling; // Details are often in a sibling div | |
| if (!stations.some(s => s.url === item.href)) { | |
| stations.push({ html: detailsContainer.innerHTML, url: item.href }); | |
| } | |
| }); | |
| const nextButton = document.querySelector('button[aria-label="Next page"]'); | |
| if (nextButton && !nextButton.disabled) { | |
| console.log(` - Found ${stations.length} stations so far. Clicking next page...`); | |
| nextButton.click(); | |
| page++; | |
| await sleep(randomDelay(4000, 6000)); // Crucial delay for next page to load | |
| } else { | |
| console.log(` - No more pages found.`); | |
| break; | |
| } | |
| } | |
| // 3. Save Data | |
| db[county] = stations; | |
| localStorage.setItem(LOCAL_STORAGE_KEY, JSON.stringify(db)); | |
| console.log(`β SUCCESS: Saved ${stations.length} stations for ${county} County.`); | |
| } catch (error) { | |
| console.error(`β ERROR processing ${county} County:`, error.message); | |
| console.log(` - Skipping this county for now. It will be retried if you run the script again.`); | |
| } | |
| const nextCountyDelay = randomDelay(5000, 10000); | |
| console.log(` - Waiting for ${Math.round(nextCountyDelay / 1000)} seconds before next county...`); | |
| await sleep(nextCountyDelay); | |
| } | |
| console.log("πππ All counties finished! πππ"); | |
| console.log("To view the final data, run this in the console:"); | |
| console.log(`copy(localStorage.getItem('${LOCAL_STORAGE_KEY}'))`); | |
| console.log('Then paste the data from your clipboard into a text file and save it as "georgia_gas_stations.json".'); | |
| })(); | |