Create web crawl open data geospatial
Browse files
web crawl open data geospatial
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from bs4 import BeautifulSoup
|
| 3 |
+
import webbrowser
|
| 4 |
+
|
| 5 |
+
def search_geospatial_open_data():
|
| 6 |
+
search_url = "https://www.google.com/search?q=geospatial+open+data+sources"
|
| 7 |
+
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
| 8 |
+
|
| 9 |
+
response = requests.get(search_url, headers=headers)
|
| 10 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 11 |
+
|
| 12 |
+
results = []
|
| 13 |
+
for result in soup.find_all('div', class_='yuRUbf'):
|
| 14 |
+
title = result.find('h3', class_='r').text
|
| 15 |
+
link = result.find('a')['href']
|
| 16 |
+
results.append((title, link))
|
| 17 |
+
|
| 18 |
+
return results
|
| 19 |
+
|
| 20 |
+
def display_results(results):
|
| 21 |
+
print("Top Geospatial Open Data Sources:")
|
| 22 |
+
for i, (title, link) in enumerate(results, 1):
|
| 23 |
+
print(f"{i}. {title}")
|
| 24 |
+
print(f" {link}\n")
|
| 25 |
+
|
| 26 |
+
def open_selected_link(results):
|
| 27 |
+
choice = int(input("Enter the number of the source you'd like to open (0 to exit): "))
|
| 28 |
+
if 0 < choice <= len(results):
|
| 29 |
+
webbrowser.open(results[choice-1][1])
|
| 30 |
+
elif choice != 0:
|
| 31 |
+
print("Invalid choice. Please try again.")
|
| 32 |
+
|
| 33 |
+
if __name__ == "__main__":
|
| 34 |
+
results = search_geospatial_open_data()
|
| 35 |
+
display_results(results)
|
| 36 |
+
open_selected_link(results)
|