Spaces:
Sleeping
Sleeping
File size: 8,461 Bytes
c460629 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
/**
* ===================================================================================
* === Google Maps - Fully Automated Georgia Gas Station Scraper =====================
* ===================================================================================
*
* WARNING: THIS SCRIPT VIOLATES GOOGLE'S TERMS OF SERVICE. USE AT YOUR OWN RISK.
* IT IS FOR EDUCATIONAL PURPOSES ONLY.
*
* This script will automatically:
* 1. Loop through a list of all 159 counties in Georgia.
* 2. Skip any counties that have already been scraped and saved.
* 3. Type the search query into the Google Maps search bar.
* 4. Click the search button.
* 5. Wait for results to load.
* 6. Scrape all business details on the current page.
* 7. Scroll and click the "Next page" button until all pages are scraped.
* 8. Save the collected data for the county into localStorage.
* 9. Move to the next county.
*
* ===================================================================================
*/
(async () => {
// --- Configuration & Data ---
const counties = ["Appling", "Atkinson", "Bacon", "Baker", "Baldwin", "Banks", "Barrow", "Bartow", "Ben Hill", "Berrien", "Bibb", "Bleckley", "Brantley", "Brooks", "Bryan", "Bulloch", "Burke", "Butts", "Calhoun", "Camden", "Candler", "Carroll", "Catoosa", "Charlton", "Chatham", "Chattahoochee", "Chattooga", "Cherokee", "Clarke", "Clay", "Clayton", "Clinch", "Cobb", "Coffee", "Colquitt", "Columbia", "Cook", "Coweta", "Crawford", "Crisp", "Dade", "Dawson", "Decatur", "DeKalb", "Dodge", "Dooly", "Dougherty", "Douglas", "Early", "Echols", "Effingham", "Elbert", "Emanuel", "Evans", "Fannin", "Fayette", "Floyd", "Forsyth", "Franklin", "Fulton", "Gilmer", "Glascock", "Glynn", "Gordon", "Grady", "Greene", "Gwinnett", "Habersham", "Hall", "Hancock", "Haralson", "Harris", "Hart", "Heard", "Henry", "Houston", "Irwin", "Jackson", "Jasper", "Jeff Davis", "Jefferson", "Jenkins", "Johnson", "Jones", "Lamar", "Lanier", "Laurens", "Lee", "Liberty", "Lincoln", "Long", "Lowndes", "Lumpkin", "Macon", "Madison", "Marion", "McDuffie", "McIntosh", "Meriwether", "Miller", "Mitchell", "Monroe", "Montgomery", "Morgan", "Murray", "Muscogee", "Newton", "Oconee", "Oglethorpe", "Paulding", "Peach", "Pickens", "Pierce", "Pike", "Polk", "Pulaski", "Putnam", "Quitman", "Rabun", "Randolph", "Richmond", "Rockdale", "Schley", "Screven", "Seminole", "Spalding", "Stephens", "Stewart", "Sumter", "Talbot", "Taliaferro", "Tattnall", "Taylor", "Telfair", "Terrell", "Thomas", "Tift", "Toombs", "Towns", "Treutlen", "Troup", "Turner", "Twiggs", "Union", "Upson", "Walker", "Walton", "Ware", "Warren", "Washington", "Wayne", "Webster", "Wheeler", "White", "Whitfield", "Wilcox", "Wilkes", "Wilkinson", "Worth"];
const LOCAL_STORAGE_KEY = "gaGasStationsDB";
// --- Helper Functions ---
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
const randomDelay = (min = 1000, max = 3000) => Math.floor(Math.random() * (max - min + 1) + min);
const waitFor = (selector, timeout = 20000) =>
new Promise((resolve, reject) => {
const interval = 100;
let time = 0;
const timer = setInterval(() => {
const el = document.querySelector(selector);
if (el) {
clearInterval(timer);
resolve(el);
}
time += interval;
if (time >= timeout) {
clearInterval(timer);
reject(new Error(`Timeout waiting for selector: ${selector}`));
}
}, interval);
});
// More robust way to set input value for sites using frameworks like React
function setNativeValue(element, value) {
const valueSetter = Object.getOwnPropertyDescriptor(element, "value")?.set;
const prototype = Object.getPrototypeOf(element);
const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, "value")?.set;
if (prototypeValueSetter) {
prototypeValueSetter.call(element, value);
} else if (valueSetter) {
valueSetter.call(element, value);
} else {
element.value = value;
}
element.dispatchEvent(new Event("input", { bubbles: true }));
element.dispatchEvent(new Event("change", { bubbles: true }));
}
// --- Main Logic ---
let db = JSON.parse(localStorage.getItem(LOCAL_STORAGE_KEY) || "{}");
const countiesToDo = counties.filter((c) => !db[c]);
if (countiesToDo.length === 0) {
console.log("π All counties have already been scraped! Data is in localStorage.");
console.log('To view it, run: JSON.parse(localStorage.getItem("' + LOCAL_STORAGE_KEY + '"))');
console.log('To restart, run: localStorage.removeItem("' + LOCAL_STORAGE_KEY + '")');
return;
}
console.log(`π Starting scraper. ${counties.length} total counties, ${countiesToDo.length} remaining.`);
await sleep(2000);
for (const county of countiesToDo) {
console.log(`====================================================`);
console.log(`π Now processing: ${county} County`);
try {
// 1. Perform Search
const searchInput = await waitFor("input#searchboxinput");
const searchButton = await waitFor("button#searchbox-searchbutton");
const searchQuery = `gas station in ${county} County, Georgia, USA`;
console.log(` - Typing search query: "${searchQuery}"`);
setNativeValue(searchInput, searchQuery);
await sleep(randomDelay(500, 1000));
console.log(` - Clicking search button...`);
searchButton.click();
// 2. Wait for results panel and scrape
const resultsPanelSelector = `div[aria-label*="Results for gas station in ${county} County"]`;
const panel = await waitFor(resultsPanelSelector);
console.log(` - Results panel loaded.`);
await sleep(randomDelay(3000, 5000)); // Wait for results to populate
const stations = [];
let page = 1;
let LastLength = 0;
while (true) {
console.log(` - Scraping page ${page}...`);
// Scroll to bottom to ensure all results on the page are rendered
panel.scrollTop = panel.scrollHeight;
await sleep(randomDelay(2000, 3500));
panel.scrollTop = panel.scrollHeight; // scroll again to be sure
await sleep(1000);
// This selector targets the link that wraps each search result item
const resultItems = document.querySelectorAll('a[href*="https://www.google.com/maps/place/"]');
console.log(` - Found ${resultItems.length} result items.`);
if (LastLength === resultItems.length) {
console.log(` - No new results found.`);
break;
}
LastLength = resultItems.length;
resultItems.forEach((item) => {
// Check if this item is actually a result in our list, not a map pin link
if (!item.closest(resultsPanelSelector)) return;
const detailsContainer = item.nextElementSibling.nextElementSibling; // Details are often in a sibling div
if (!stations.some(s => s.url === item.href)) {
stations.push({ html: detailsContainer.innerHTML, url: item.href });
}
});
const nextButton = document.querySelector('button[aria-label="Next page"]');
if (nextButton && !nextButton.disabled) {
console.log(` - Found ${stations.length} stations so far. Clicking next page...`);
nextButton.click();
page++;
await sleep(randomDelay(4000, 6000)); // Crucial delay for next page to load
} else {
console.log(` - No more pages found.`);
break;
}
}
// 3. Save Data
db[county] = stations;
localStorage.setItem(LOCAL_STORAGE_KEY, JSON.stringify(db));
console.log(`β
SUCCESS: Saved ${stations.length} stations for ${county} County.`);
} catch (error) {
console.error(`β ERROR processing ${county} County:`, error.message);
console.log(` - Skipping this county for now. It will be retried if you run the script again.`);
}
const nextCountyDelay = randomDelay(5000, 10000);
console.log(` - Waiting for ${Math.round(nextCountyDelay / 1000)} seconds before next county...`);
await sleep(nextCountyDelay);
}
console.log("πππ All counties finished! πππ");
console.log("To view the final data, run this in the console:");
console.log(`copy(localStorage.getItem('${LOCAL_STORAGE_KEY}'))`);
console.log('Then paste the data from your clipboard into a text file and save it as "georgia_gas_stations.json".');
})();
|