Spaces:

Greff3
/

Brave

Running

App Files Files Community

rkihacker commited on Sep 27

Commit

94fa239

verified ·

1 Parent(s): 462850b

Update main.py

Browse files

Files changed (1) hide show

main.py +68 -48

main.py CHANGED Viewed

@@ -64,15 +64,16 @@ class BingSearch:
             timeout=self.timeout,
             impersonate=impersonate
         )
-        self.session.headers.update(LitAgent().generate_fingerprint())
     def _selectors(self, element):
         selectors = {
-            'url': 'h2 a',
-            'title': 'h2',
-            'text': 'p',
-            'links': 'ol#b_results > li.b_algo',
-            'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
         }
         return selectors[element]
@@ -106,6 +107,7 @@ class BingSearch:
             print(f"Error decoding Base64 string: {e}")
         return resp
     def text(
         self,
         keywords: str,
@@ -116,61 +118,79 @@ class BingSearch:
     ) -> List[BingSearchResult]:
         if not keywords:
             raise ValueError("Search keywords cannot be empty")
-        safe_map = {
-            "on": "Strict",
-            "moderate": "Moderate",
-            "off": "Off"
-        }
-        safe = safe_map.get(safesearch.lower(), "Moderate")
         fetched_results = []
         fetched_links = set()
         def fetch_page(url):
             try:
                 resp = self.session.get(url)
                 resp.raise_for_status()
                 return resp.text
             except Exception as e:
-                if hasattr(e, 'response') and e.response is not None:
-                    raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
-                else:
-                    raise Exception(f"Bing search failed: {str(e)}")
-        url = self._first_page(keywords)['url']
-        urls_to_fetch = [url]
-        while len(fetched_results) < max_results and urls_to_fetch:
-            html_pages = list(self._executor.map(fetch_page, urls_to_fetch))
-            urls_to_fetch = []
-            for html in html_pages:
-                soup = BeautifulSoup(html, "html.parser")
-                selector_links = self._selectors('links')
-                result_blocks = soup.select(selector_links)
-                for result in result_blocks:
-                    link_tag = result.select_one(self._selectors('url'))
-                    if not link_tag:
                         continue
-                    url_val = self._get_url(link_tag)
-                    title_tag = result.select_one(self._selectors('title'))
-                    title = title_tag.get_text(strip=True) if title_tag else ''
-                    desc_tag = result.select_one(self._selectors('text'))
-                    description = desc_tag.get_text(strip=True) if desc_tag else ''
-                    if url_val and title:
-                        if unique and url_val in fetched_links:
-                            continue
-                        fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
-                        fetched_links.add(url_val)
-                        if len(fetched_results) >= max_results:
-                            break
-                if len(fetched_results) >= max_results:
-                    break
-                next_page_info = self._next_page(soup)
-                if next_page_info['url']:
-                    urls_to_fetch.append(next_page_info['url'])
-                sleep(self.sleep_interval)
             next_page_info = self._next_page(soup)
-            url = next_page_info['url']
-            sleep(self.sleep_interval)
         return fetched_results[:max_results]
     def suggestions(self, query: str, region: str = None) -> List[str]:
         if not query:
             raise ValueError("Search query cannot be empty")

             timeout=self.timeout,
             impersonate=impersonate
         )
+        # It's good practice to set a realistic User-Agent
+        self.session.headers.update({
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
+        })
+    # FIX: Updated selectors to be more robust against Bing UI changes.
     def _selectors(self, element):
         selectors = {
+            'links': 'ol#b_results > li',  # More generic selector for any list item in results
+            'next': 'a.sb_pagN' # Selector for the "Next" page button
         }
         return selectors[element]
             print(f"Error decoding Base64 string: {e}")
         return resp
+    # FIX: The entire text parsing logic is updated to handle modern Bing HTML structure.
     def text(
         self,
         keywords: str,
     ) -> List[BingSearchResult]:
         if not keywords:
             raise ValueError("Search keywords cannot be empty")
         fetched_results = []
         fetched_links = set()
         def fetch_page(url):
             try:
                 resp = self.session.get(url)
                 resp.raise_for_status()
                 return resp.text
             except Exception as e:
+                raise Exception(f"Bing search failed: {str(e)}")
+        current_url = self._first_page(keywords)['url']
+        while current_url and len(fetched_results) < max_results:
+            html = fetch_page(current_url)
+            soup = BeautifulSoup(html, "html.parser")
+            # Use the more generic selector for result blocks
+            result_blocks = soup.select(self._selectors('links'))
+            for result in result_blocks:
+                # Find the title and link, which are usually in an <h2> tag
+                title_tag = result.find('h2')
+                if not title_tag:
+                    continue
+                link_tag = title_tag.find('a')
+                if not link_tag or not link_tag.has_attr('href'):
+                    continue
+                url_val = self._get_url(link_tag)
+                title = title_tag.get_text(strip=True)
+                # Find the description, often in a div with class 'b_caption'
+                desc_container = result.find('div', class_='b_caption')
+                description = ''
+                if desc_container:
+                    # Find the paragraph within the caption, or use the whole caption text
+                    desc_p = desc_container.find('p')
+                    if desc_p:
+                        description = desc_p.get_text(strip=True)
+                    else:
+                        description = desc_container.get_text(strip=True)
+                # Fallback if no 'b_caption' is found
+                if not description:
+                    p_tag = result.find('p')
+                    if p_tag:
+                        description = p_tag.get_text(strip=True)
+                if url_val and title:
+                    if unique and url_val in fetched_links:
                         continue
+                    fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
+                    fetched_links.add(url_val)
+                    if len(fetched_results) >= max_results:
+                        break
+            if len(fetched_results) >= max_results:
+                break
+            # Find the next page URL
             next_page_info = self._next_page(soup)
+            current_url = next_page_info['url']
+            if current_url:
+                sleep(self.sleep_interval)
         return fetched_results[:max_results]
     def suggestions(self, query: str, region: str = None) -> List[str]:
         if not query:
             raise ValueError("Search query cannot be empty")