File size: 2,767 Bytes
6b8d8ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import io
from playwright.sync_api import sync_playwright
import pandas as pd
from geolocation import getcoords


MAIN_TAB_XPATH = '//a[contains(@href, "https://www.google.com/maps/place")]'

data = None


def get_excel_bytes(data):
    df = pd.DataFrame(data)

    output = io.BytesIO()

    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    df.to_excel(writer, sheet_name='Sheet1', index=False)

    writer.close()

    return output.getvalue()


def scrape_maps(search_term, city, state, num_items):

    lat, lon = getcoords(f"{city}, {state}")

    URL = f"https://www.google.com/maps/@{lat},{lon},12z?entry=ttu"

    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)

        page = browser.new_page()

        page.goto(URL)

        searchbox = page.locator("#searchboxinput")
        searchbox.click()
        searchbox.fill(f"{search_term} near me")
        page.keyboard.press("Enter")
        page.wait_for_timeout(2000)

        page.hover(MAIN_TAB_XPATH)

        results = page.locator(MAIN_TAB_XPATH).all()

        while len(results) < num_items:
            page.mouse.wheel(0, 1000)
            page.wait_for_timeout(2000)

            results = page.locator(MAIN_TAB_XPATH).all()
            print(len(results))

            if len(results) >= num_items:
                break

        titles = []
        webs = []
        phones = []
        addrs = []

        for result in results:
            result.click()

            page.wait_for_timeout(2000)

            response = page.get_by_role("main").last

            title = address = website = phone = ""

            if response.locator("h1").last.is_visible():
                title = response.locator("h1").last.inner_text()

            if response.locator("[data-item-id='address']").locator(".fontBodyMedium").is_visible():
                address = response.locator(
                    "[data-item-id='address']").locator(".fontBodyMedium").inner_text()

            if response.locator("[data-item-id='authority']").is_visible():
                website = response.locator(
                    "[data-item-id='authority']").get_attribute("href")

            if response.locator("[data-tooltip='Copy phone number']").locator(".fontBodyMedium").is_visible():
                phone = response.locator(
                    "[data-tooltip='Copy phone number']").locator(".fontBodyMedium").inner_text()

            titles.append(title)
            addrs.append(address)
            webs.append(website)
            phones.append(phone)

        data = {
            "Titles": titles,
            "Address": addrs,
            "Website": webs,
            "Phone No.": phones
        }

        browser.close()

    return get_excel_bytes(data=data)