Spaces:

AamirMalik
/

Electronic_Components_ChatBot

Sleeping

App Files Files Community

AamirMalik commited on Feb 5, 2025

Commit

daed239

verified ·

1 Parent(s): 4c76280

Update scraping_utils.py

Browse files

Files changed (1) hide show

scraping_utils.py +19 -46

scraping_utils.py CHANGED Viewed

@@ -1,54 +1,27 @@
 import requests
 from bs4 import BeautifulSoup
-def scrape_farnell(component_name):
-    url = f"https://uk.farnell.com/search?st={component_name}"
-    response = requests.get(url, timeout=10)
-    if response.status_code != 200:
-        raise Exception(f"Failed to fetch data from Farnell: {response.status_code}")
-    soup = BeautifulSoup(response.text, "lxml")
-    components = []
-    # Updated checks for HTML structure
-    products = soup.find_all("div", class_="product")
-    if not products:
-        return []
-    for item in products:
-        name = item.find("h3")
-        description = item.find("p", class_="description")
-        datasheet_link = item.find("a", text="Datasheet")
-        components.append({
-            "name": name.text.strip() if name else "No name available",
-            "description": description.text.strip() if description else "No description available",
-            "datasheet_link": datasheet_link["href"] if datasheet_link else "No datasheet available"
-        })
-    return components
-def scrape_digikey(component_name):
-    url = f"https://www.digikey.com/en/products/result?keywords={component_name}"
-    response = requests.get(url)
     if response.status_code != 200:
-        raise Exception(f"Failed to fetch data from Digi-Key: {response.status_code}")
-    soup = BeautifulSoup(response.text, "lxml")
-    components = []
-    for item in soup.find_all("tr", class_="product-row"):
-        name = item.find("td", class_="digikey-part-number")
-        name = name.text.strip() if name else "No name available"
-        description = item.find("td", class_="description")
-        description = description.text.strip() if description else "No description available"
-        datasheet_link = item.find("a", text="Datasheet")
-        datasheet_link = datasheet_link["href"] if datasheet_link else "No datasheet available"
-        components.append({
-            "name": name,
-            "description": description,
-            "datasheet_link": datasheet_link
-        })
-    return components

 import requests
 from bs4 import BeautifulSoup
+def search_web(query):
+    """
+    Perform a Google search for the query and return a list of results.
+    Each result includes the title, link, and description.
+    """
+    url = f"https://www.google.com/search?q={query.replace(' ', '+')}+electronics+component"
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"
+    }
+    response = requests.get(url, headers=headers, timeout=10)
     if response.status_code != 200:
+        raise Exception(f"Failed to fetch search results: {response.status_code}")
+    soup = BeautifulSoup(response.text, "html.parser")
+    results = []
+    for g in soup.find_all("div", class_="tF2Cxc"):
+        title = g.find("h3").text if g.find("h3") else "No title available"
+        link = g.find("a")["href"] if g.find("a") else "No link available"
+        description = g.find("span", class_="aCOpRe").text if g.find("span", class_="aCOpRe") else "No description available"
+        results.append({"title": title, "link": link, "description": description})
+    return results