AamirMalik commited on
Commit
daed239
·
verified ·
1 Parent(s): 4c76280

Update scraping_utils.py

Browse files
Files changed (1) hide show
  1. scraping_utils.py +19 -46
scraping_utils.py CHANGED
@@ -1,54 +1,27 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
4
- def scrape_farnell(component_name):
5
- url = f"https://uk.farnell.com/search?st={component_name}"
6
- response = requests.get(url, timeout=10)
7
- if response.status_code != 200:
8
- raise Exception(f"Failed to fetch data from Farnell: {response.status_code}")
9
-
10
- soup = BeautifulSoup(response.text, "lxml")
11
- components = []
12
-
13
- # Updated checks for HTML structure
14
- products = soup.find_all("div", class_="product")
15
- if not products:
16
- return []
17
-
18
- for item in products:
19
- name = item.find("h3")
20
- description = item.find("p", class_="description")
21
- datasheet_link = item.find("a", text="Datasheet")
22
-
23
- components.append({
24
- "name": name.text.strip() if name else "No name available",
25
- "description": description.text.strip() if description else "No description available",
26
- "datasheet_link": datasheet_link["href"] if datasheet_link else "No datasheet available"
27
- })
28
-
29
- return components
30
 
31
- def scrape_digikey(component_name):
32
- url = f"https://www.digikey.com/en/products/result?keywords={component_name}"
33
- response = requests.get(url)
34
  if response.status_code != 200:
35
- raise Exception(f"Failed to fetch data from Digi-Key: {response.status_code}")
36
 
37
- soup = BeautifulSoup(response.text, "lxml")
38
- components = []
39
 
40
- for item in soup.find_all("tr", class_="product-row"):
41
- name = item.find("td", class_="digikey-part-number")
42
- name = name.text.strip() if name else "No name available"
43
- description = item.find("td", class_="description")
44
- description = description.text.strip() if description else "No description available"
45
- datasheet_link = item.find("a", text="Datasheet")
46
- datasheet_link = datasheet_link["href"] if datasheet_link else "No datasheet available"
47
-
48
- components.append({
49
- "name": name,
50
- "description": description,
51
- "datasheet_link": datasheet_link
52
- })
53
 
54
- return components
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
 
4
+ def search_web(query):
5
+ """
6
+ Perform a Google search for the query and return a list of results.
7
+ Each result includes the title, link, and description.
8
+ """
9
+ url = f"https://www.google.com/search?q={query.replace(' ', '+')}+electronics+component"
10
+ headers = {
11
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
12
+ }
13
+ response = requests.get(url, headers=headers, timeout=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
15
  if response.status_code != 200:
16
+ raise Exception(f"Failed to fetch search results: {response.status_code}")
17
 
18
+ soup = BeautifulSoup(response.text, "html.parser")
19
+ results = []
20
 
21
+ for g in soup.find_all("div", class_="tF2Cxc"):
22
+ title = g.find("h3").text if g.find("h3") else "No title available"
23
+ link = g.find("a")["href"] if g.find("a") else "No link available"
24
+ description = g.find("span", class_="aCOpRe").text if g.find("span", class_="aCOpRe") else "No description available"
25
+ results.append({"title": title, "link": link, "description": description})
 
 
 
 
 
 
 
 
26
 
27
+ return results