teagardan commited on
Commit
a1aab2c
·
verified ·
1 Parent(s): 7edd9a7

Create web crawl open data geospatial

Browse files
Files changed (1) hide show
  1. web crawl open data geospatial +36 -0
web crawl open data geospatial ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import webbrowser
4
+
5
+ def search_geospatial_open_data():
6
+ search_url = "https://www.google.com/search?q=geospatial+open+data+sources"
7
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
8
+
9
+ response = requests.get(search_url, headers=headers)
10
+ soup = BeautifulSoup(response.text, 'html.parser')
11
+
12
+ results = []
13
+ for result in soup.find_all('div', class_='yuRUbf'):
14
+ title = result.find('h3', class_='r').text
15
+ link = result.find('a')['href']
16
+ results.append((title, link))
17
+
18
+ return results
19
+
20
+ def display_results(results):
21
+ print("Top Geospatial Open Data Sources:")
22
+ for i, (title, link) in enumerate(results, 1):
23
+ print(f"{i}. {title}")
24
+ print(f" {link}\n")
25
+
26
+ def open_selected_link(results):
27
+ choice = int(input("Enter the number of the source you'd like to open (0 to exit): "))
28
+ if 0 < choice <= len(results):
29
+ webbrowser.open(results[choice-1][1])
30
+ elif choice != 0:
31
+ print("Invalid choice. Please try again.")
32
+
33
+ if __name__ == "__main__":
34
+ results = search_geospatial_open_data()
35
+ display_results(results)
36
+ open_selected_link(results)