Update scraping_utils.py
Browse files- scraping_utils.py +19 -46
scraping_utils.py
CHANGED
|
@@ -1,54 +1,27 @@
|
|
| 1 |
import requests
|
| 2 |
from bs4 import BeautifulSoup
|
| 3 |
|
| 4 |
-
def
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
products = soup.find_all("div", class_="product")
|
| 15 |
-
if not products:
|
| 16 |
-
return []
|
| 17 |
-
|
| 18 |
-
for item in products:
|
| 19 |
-
name = item.find("h3")
|
| 20 |
-
description = item.find("p", class_="description")
|
| 21 |
-
datasheet_link = item.find("a", text="Datasheet")
|
| 22 |
-
|
| 23 |
-
components.append({
|
| 24 |
-
"name": name.text.strip() if name else "No name available",
|
| 25 |
-
"description": description.text.strip() if description else "No description available",
|
| 26 |
-
"datasheet_link": datasheet_link["href"] if datasheet_link else "No datasheet available"
|
| 27 |
-
})
|
| 28 |
-
|
| 29 |
-
return components
|
| 30 |
|
| 31 |
-
def scrape_digikey(component_name):
|
| 32 |
-
url = f"https://www.digikey.com/en/products/result?keywords={component_name}"
|
| 33 |
-
response = requests.get(url)
|
| 34 |
if response.status_code != 200:
|
| 35 |
-
raise Exception(f"Failed to fetch
|
| 36 |
|
| 37 |
-
soup = BeautifulSoup(response.text, "
|
| 38 |
-
|
| 39 |
|
| 40 |
-
for
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
description =
|
| 44 |
-
|
| 45 |
-
datasheet_link = item.find("a", text="Datasheet")
|
| 46 |
-
datasheet_link = datasheet_link["href"] if datasheet_link else "No datasheet available"
|
| 47 |
-
|
| 48 |
-
components.append({
|
| 49 |
-
"name": name,
|
| 50 |
-
"description": description,
|
| 51 |
-
"datasheet_link": datasheet_link
|
| 52 |
-
})
|
| 53 |
|
| 54 |
-
return
|
|
|
|
| 1 |
import requests
|
| 2 |
from bs4 import BeautifulSoup
|
| 3 |
|
| 4 |
+
def search_web(query):
|
| 5 |
+
"""
|
| 6 |
+
Perform a Google search for the query and return a list of results.
|
| 7 |
+
Each result includes the title, link, and description.
|
| 8 |
+
"""
|
| 9 |
+
url = f"https://www.google.com/search?q={query.replace(' ', '+')}+electronics+component"
|
| 10 |
+
headers = {
|
| 11 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
|
| 12 |
+
}
|
| 13 |
+
response = requests.get(url, headers=headers, timeout=10)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
|
|
|
|
|
|
|
|
|
| 15 |
if response.status_code != 200:
|
| 16 |
+
raise Exception(f"Failed to fetch search results: {response.status_code}")
|
| 17 |
|
| 18 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
| 19 |
+
results = []
|
| 20 |
|
| 21 |
+
for g in soup.find_all("div", class_="tF2Cxc"):
|
| 22 |
+
title = g.find("h3").text if g.find("h3") else "No title available"
|
| 23 |
+
link = g.find("a")["href"] if g.find("a") else "No link available"
|
| 24 |
+
description = g.find("span", class_="aCOpRe").text if g.find("span", class_="aCOpRe") else "No description available"
|
| 25 |
+
results.append({"title": title, "link": link, "description": description})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
return results
|