rkihacker commited on
Commit
94fa239
·
verified ·
1 Parent(s): 462850b

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +68 -48
main.py CHANGED
@@ -64,15 +64,16 @@ class BingSearch:
64
  timeout=self.timeout,
65
  impersonate=impersonate
66
  )
67
- self.session.headers.update(LitAgent().generate_fingerprint())
 
 
 
68
 
 
69
  def _selectors(self, element):
70
  selectors = {
71
- 'url': 'h2 a',
72
- 'title': 'h2',
73
- 'text': 'p',
74
- 'links': 'ol#b_results > li.b_algo',
75
- 'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
76
  }
77
  return selectors[element]
78
 
@@ -106,6 +107,7 @@ class BingSearch:
106
  print(f"Error decoding Base64 string: {e}")
107
  return resp
108
 
 
109
  def text(
110
  self,
111
  keywords: str,
@@ -116,61 +118,79 @@ class BingSearch:
116
  ) -> List[BingSearchResult]:
117
  if not keywords:
118
  raise ValueError("Search keywords cannot be empty")
119
- safe_map = {
120
- "on": "Strict",
121
- "moderate": "Moderate",
122
- "off": "Off"
123
- }
124
- safe = safe_map.get(safesearch.lower(), "Moderate")
125
  fetched_results = []
126
  fetched_links = set()
 
127
  def fetch_page(url):
128
  try:
129
  resp = self.session.get(url)
130
  resp.raise_for_status()
131
  return resp.text
132
  except Exception as e:
133
- if hasattr(e, 'response') and e.response is not None:
134
- raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
135
- else:
136
- raise Exception(f"Bing search failed: {str(e)}")
137
 
138
- url = self._first_page(keywords)['url']
139
- urls_to_fetch = [url]
140
- while len(fetched_results) < max_results and urls_to_fetch:
141
- html_pages = list(self._executor.map(fetch_page, urls_to_fetch))
142
- urls_to_fetch = []
143
- for html in html_pages:
144
- soup = BeautifulSoup(html, "html.parser")
145
- selector_links = self._selectors('links')
146
- result_blocks = soup.select(selector_links)
147
- for result in result_blocks:
148
- link_tag = result.select_one(self._selectors('url'))
149
- if not link_tag:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  continue
151
- url_val = self._get_url(link_tag)
152
- title_tag = result.select_one(self._selectors('title'))
153
- title = title_tag.get_text(strip=True) if title_tag else ''
154
- desc_tag = result.select_one(self._selectors('text'))
155
- description = desc_tag.get_text(strip=True) if desc_tag else ''
156
- if url_val and title:
157
- if unique and url_val in fetched_links:
158
- continue
159
- fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
160
- fetched_links.add(url_val)
161
- if len(fetched_results) >= max_results:
162
- break
163
- if len(fetched_results) >= max_results:
164
- break
165
- next_page_info = self._next_page(soup)
166
- if next_page_info['url']:
167
- urls_to_fetch.append(next_page_info['url'])
168
- sleep(self.sleep_interval)
169
  next_page_info = self._next_page(soup)
170
- url = next_page_info['url']
171
- sleep(self.sleep_interval)
 
 
172
  return fetched_results[:max_results]
173
 
 
174
  def suggestions(self, query: str, region: str = None) -> List[str]:
175
  if not query:
176
  raise ValueError("Search query cannot be empty")
 
64
  timeout=self.timeout,
65
  impersonate=impersonate
66
  )
67
+ # It's good practice to set a realistic User-Agent
68
+ self.session.headers.update({
69
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
70
+ })
71
 
72
+ # FIX: Updated selectors to be more robust against Bing UI changes.
73
  def _selectors(self, element):
74
  selectors = {
75
+ 'links': 'ol#b_results > li', # More generic selector for any list item in results
76
+ 'next': 'a.sb_pagN' # Selector for the "Next" page button
 
 
 
77
  }
78
  return selectors[element]
79
 
 
107
  print(f"Error decoding Base64 string: {e}")
108
  return resp
109
 
110
+ # FIX: The entire text parsing logic is updated to handle modern Bing HTML structure.
111
  def text(
112
  self,
113
  keywords: str,
 
118
  ) -> List[BingSearchResult]:
119
  if not keywords:
120
  raise ValueError("Search keywords cannot be empty")
121
+
 
 
 
 
 
122
  fetched_results = []
123
  fetched_links = set()
124
+
125
  def fetch_page(url):
126
  try:
127
  resp = self.session.get(url)
128
  resp.raise_for_status()
129
  return resp.text
130
  except Exception as e:
131
+ raise Exception(f"Bing search failed: {str(e)}")
 
 
 
132
 
133
+ current_url = self._first_page(keywords)['url']
134
+
135
+ while current_url and len(fetched_results) < max_results:
136
+ html = fetch_page(current_url)
137
+ soup = BeautifulSoup(html, "html.parser")
138
+
139
+ # Use the more generic selector for result blocks
140
+ result_blocks = soup.select(self._selectors('links'))
141
+
142
+ for result in result_blocks:
143
+ # Find the title and link, which are usually in an <h2> tag
144
+ title_tag = result.find('h2')
145
+ if not title_tag:
146
+ continue
147
+
148
+ link_tag = title_tag.find('a')
149
+ if not link_tag or not link_tag.has_attr('href'):
150
+ continue
151
+
152
+ url_val = self._get_url(link_tag)
153
+ title = title_tag.get_text(strip=True)
154
+
155
+ # Find the description, often in a div with class 'b_caption'
156
+ desc_container = result.find('div', class_='b_caption')
157
+ description = ''
158
+ if desc_container:
159
+ # Find the paragraph within the caption, or use the whole caption text
160
+ desc_p = desc_container.find('p')
161
+ if desc_p:
162
+ description = desc_p.get_text(strip=True)
163
+ else:
164
+ description = desc_container.get_text(strip=True)
165
+
166
+ # Fallback if no 'b_caption' is found
167
+ if not description:
168
+ p_tag = result.find('p')
169
+ if p_tag:
170
+ description = p_tag.get_text(strip=True)
171
+
172
+ if url_val and title:
173
+ if unique and url_val in fetched_links:
174
  continue
175
+
176
+ fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
177
+ fetched_links.add(url_val)
178
+
179
+ if len(fetched_results) >= max_results:
180
+ break
181
+
182
+ if len(fetched_results) >= max_results:
183
+ break
184
+
185
+ # Find the next page URL
 
 
 
 
 
 
 
186
  next_page_info = self._next_page(soup)
187
+ current_url = next_page_info['url']
188
+ if current_url:
189
+ sleep(self.sleep_interval)
190
+
191
  return fetched_results[:max_results]
192
 
193
+
194
  def suggestions(self, query: str, region: str = None) -> List[str]:
195
  if not query:
196
  raise ValueError("Search query cannot be empty")