Greff3 commited on
Commit
084da71
·
verified ·
1 Parent(s): 9ea403e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +177 -229
main.py CHANGED
@@ -1,30 +1,35 @@
1
- from fastapi import FastAPI, HTTPException, Query
2
- from typing import List, Optional
3
- from pydantic import BaseModel
4
- from time import sleep
5
- from curl_cffi.requests import Session
6
- from urllib.parse import urlencode, unquote, urlparse, parse_qs
7
  import base64
8
- from typing import Dict, Any
9
  from concurrent.futures import ThreadPoolExecutor
10
- from webscout.litagent import LitAgent
 
 
11
  from bs4 import BeautifulSoup
12
- import json
 
 
 
13
 
 
14
  app = FastAPI(
15
- title="Snapzion Search API",
16
- description="A FastAPI wrapper for the Search library with advanced features.",
17
- version="1.0.0",
18
  )
19
 
20
- # --- BingSearch Library Code ---
21
- # The provided BingSearch code is integrated here directly.
22
 
23
- class BingSearchResult(BaseModel):
24
  url: str
25
  title: str
26
  description: str
27
- metadata: Dict[str, Any] = {}
 
 
 
 
 
28
 
29
  class BingImageResult(BaseModel):
30
  title: str
@@ -39,9 +44,18 @@ class BingNewsResult(BaseModel):
39
  description: str
40
  source: str = ""
41
 
 
 
42
  class BingSearch:
43
- """Bing search implementation with configurable parameters and advanced features."""
44
- _executor: ThreadPoolExecutor = ThreadPoolExecutor()
 
 
 
 
 
 
 
45
 
46
  def __init__(
47
  self,
@@ -58,24 +72,79 @@ class BingSearch:
58
  self.lang = lang
59
  self.sleep_interval = sleep_interval
60
  self._base_url = "https://www.bing.com"
61
- self.session = Session(
62
  proxies=self.proxies,
63
  verify=self.verify,
64
  timeout=self.timeout,
65
  impersonate=impersonate
66
  )
67
- # It's good practice to set a realistic User-Agent
68
  self.session.headers.update({
69
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
70
  })
71
 
72
- # FIX: Updated selectors to be more robust against Bing UI changes.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def _selectors(self, element):
74
  selectors = {
75
- 'links': 'ol#b_results > li', # More generic selector for any list item in results
76
- 'next': 'a.sb_pagN' # Selector for the "Next" page button
77
  }
78
- return selectors[element]
79
 
80
  def _first_page(self, query):
81
  url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
@@ -91,40 +160,37 @@ class BingSearch:
91
 
92
  def _get_url(self, tag):
93
  url = tag.get('href', '')
94
- resp = url
95
  try:
96
  parsed_url = urlparse(url)
97
- query_params = parse_qs(parsed_url.query)
98
- if "u" in query_params:
99
- encoded_url = query_params["u"][0][2:]
100
- try:
101
  decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
102
- except base64.binascii.Error as e:
103
- print(f"Error decoding Base64 string: {e}")
104
- return url
105
- resp = decoded_bytes.decode('utf-8')
106
- except Exception as e:
107
- print(f"Error decoding Base64 string: {e}")
108
- return resp
109
 
110
- # FIX: The entire text parsing logic is updated to handle modern Bing HTML structure.
111
- def text(
112
  self,
113
  keywords: str,
114
  region: str = None,
115
  safesearch: str = "moderate",
116
  max_results: int = 10,
117
- unique: bool = True
118
- ) -> List[BingSearchResult]:
119
  if not keywords:
120
  raise ValueError("Search keywords cannot be empty")
121
 
122
  fetched_results = []
123
  fetched_links = set()
124
 
125
- def fetch_page(url):
126
  try:
127
- resp = self.session.get(url)
128
  resp.raise_for_status()
129
  return resp.text
130
  except Exception as e:
@@ -133,213 +199,116 @@ class BingSearch:
133
  current_url = self._first_page(keywords)['url']
134
 
135
  while current_url and len(fetched_results) < max_results:
136
- html = fetch_page(current_url)
137
  soup = BeautifulSoup(html, "html.parser")
138
 
139
- # Use the more generic selector for result blocks
140
  result_blocks = soup.select(self._selectors('links'))
141
 
142
  for result in result_blocks:
143
- # Find the title and link, which are usually in an <h2> tag
144
  title_tag = result.find('h2')
145
- if not title_tag:
146
- continue
147
 
148
  link_tag = title_tag.find('a')
149
- if not link_tag or not link_tag.has_attr('href'):
150
- continue
151
 
152
  url_val = self._get_url(link_tag)
153
  title = title_tag.get_text(strip=True)
154
 
155
- # Find the description, often in a div with class 'b_caption'
156
  desc_container = result.find('div', class_='b_caption')
157
- description = ''
158
- if desc_container:
159
- # Find the paragraph within the caption, or use the whole caption text
160
- desc_p = desc_container.find('p')
161
- if desc_p:
162
- description = desc_p.get_text(strip=True)
163
- else:
164
- description = desc_container.get_text(strip=True)
165
 
166
- # Fallback if no 'b_caption' is found
167
- if not description:
168
- p_tag = result.find('p')
169
- if p_tag:
170
- description = p_tag.get_text(strip=True)
171
-
172
  if url_val and title:
173
- if unique and url_val in fetched_links:
174
- continue
175
 
176
- fetched_results.append(BingSearchResult(url=url_val, title=title, description=description))
177
  fetched_links.add(url_val)
178
 
179
- if len(fetched_results) >= max_results:
180
- break
181
 
182
- if len(fetched_results) >= max_results:
183
- break
184
 
185
- # Find the next page URL
186
  next_page_info = self._next_page(soup)
187
  current_url = next_page_info['url']
188
  if current_url:
189
- sleep(self.sleep_interval)
190
-
191
- return fetched_results[:max_results]
192
 
 
 
 
 
 
 
 
 
193
 
194
- def suggestions(self, query: str, region: str = None) -> List[str]:
 
195
  if not query:
196
  raise ValueError("Search query cannot be empty")
197
- params = {
198
- "query": query,
199
- "mkt": region if region else "en-US"
200
- }
201
- url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
202
- try:
203
- resp = self.session.get(url)
204
- resp.raise_for_status()
205
- data = resp.json()
206
- if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
207
- return data[1]
208
- return []
209
- except Exception as e:
210
- if hasattr(e, 'response') and e.response is not None:
211
- raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
212
- else:
213
- raise Exception(f"Bing suggestions failed: {str(e)}")
214
 
215
- def images(
216
- self,
217
- keywords: str,
218
- region: str = None,
219
- safesearch: str = "moderate",
220
- max_results: int = 10
221
- ) -> List[BingImageResult]:
222
- if not keywords:
223
- raise ValueError("Search keywords cannot be empty")
224
- safe_map = {
225
- "on": "Strict",
226
- "moderate": "Moderate",
227
- "off": "Off"
228
- }
229
- safe = safe_map.get(safesearch.lower(), "Moderate")
230
- params = {
231
- "q": keywords,
232
- "count": max_results,
233
- "setlang": self.lang,
234
- "safeSearch": safe,
235
- }
236
- if region:
237
- params["mkt"] = region
238
- url = f"{self._base_url}/images/search?{urlencode(params)}"
239
- try:
240
- resp = self.session.get(url)
241
- resp.raise_for_status()
242
- html = resp.text
243
- except Exception as e:
244
- if hasattr(e, 'response') and e.response is not None:
245
- raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
246
- else:
247
- raise Exception(f"Bing image search failed: {str(e)}")
248
- soup = BeautifulSoup(html, "html.parser")
249
  results = []
250
  for item in soup.select("a.iusc"):
251
  try:
252
  m = item.get("m")
253
  meta = json.loads(m) if m else {}
254
- image_url = meta.get("murl", "")
255
- thumb_url = meta.get("turl", "")
256
- title = meta.get("t", "")
257
- page_url = meta.get("purl", "")
258
- source = meta.get("surl", "")
259
- if image_url:
260
- results.append(BingImageResult(title=title, image=image_url, thumbnail=thumb_url, url=page_url, source=source))
261
- if len(results) >= max_results:
262
- break
263
- except Exception:
264
- continue
265
- return results[:max_results]
266
-
267
- def news(
268
- self,
269
- keywords: str,
270
- region: str = None,
271
- safesearch: str = "moderate",
272
- max_results: int = 10,
273
- ) -> List['BingNewsResult']:
274
- if not keywords:
275
- raise ValueError("Search keywords cannot be empty")
276
- safe_map = {
277
- "on": "Strict",
278
- "moderate": "Moderate",
279
- "off": "Off"
280
- }
281
- safe = safe_map.get(safesearch.lower(), "Moderate")
282
- params = {
283
- "q": keywords,
284
- "form": "QBNH",
285
- "safeSearch": safe,
286
- }
287
- if region:
288
- params["mkt"] = region
289
- url = f"{self._base_url}/news/search?{urlencode(params)}"
290
- try:
291
- resp = self.session.get(url)
292
- resp.raise_for_status()
293
- except Exception as e:
294
- if hasattr(e, 'response') and e.response is not None:
295
- raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
296
- else:
297
- raise Exception(f"Bing news search failed: {str(e)}")
298
  soup = BeautifulSoup(resp.text, "html.parser")
299
  results = []
300
- for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
301
- a_tag = item.find("a")
302
- title = a_tag.get_text(strip=True) if a_tag else ''
303
- url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
304
- desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
305
- description = desc_tag.get_text(strip=True) if desc_tag else ''
306
- source_tag = item.find("div", class_="source")
307
- source = source_tag.get_text(strip=True) if source_tag else ''
308
- if url_val and title:
309
- results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
310
- if len(results) >= max_results:
311
- break
312
- if not results:
313
- for item in soup.select("a.title"):
314
- title = item.get_text(strip=True)
315
- url_val = item['href'] if item.has_attr('href') else ''
316
- description = ''
317
- source = ''
318
- if url_val and title:
319
- results.append(BingNewsResult(title=title, url=url_val, description=description, source=source))
320
- if len(results) >= max_results:
321
- break
322
- return results[:max_results]
323
-
324
 
325
  bing = BingSearch()
326
 
327
- @app.get("/search", response_model=List[BingSearchResult])
 
 
 
328
  async def text_search(
329
  query: str = Query(..., description="The search keywords."),
330
- region: Optional[str] = Query(None, description="The region for the search (e.g., 'us-US')."),
331
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
332
  max_results: int = Query(10, description="Maximum number of results to return."),
 
333
  ):
334
  """
335
  Perform a text search on Bing.
 
336
  """
337
  try:
338
- results = bing.text(
339
  keywords=query,
340
  region=region,
341
  safesearch=safesearch,
342
  max_results=max_results,
 
343
  )
344
  return results
345
  except Exception as e:
@@ -350,57 +319,36 @@ async def get_suggestions(
350
  query: str = Query(..., description="The search query for which to fetch suggestions."),
351
  region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
352
  ):
353
- """
354
- Fetches search suggestions for a given query.
355
- """
356
  try:
357
- suggestions = bing.suggestions(query=query, region=region)
358
- return suggestions
359
  except Exception as e:
360
  raise HTTPException(status_code=500, detail=str(e))
361
 
362
  @app.get("/images", response_model=List[BingImageResult])
363
  async def image_search(
364
  query: str = Query(..., description="The search keywords for images."),
365
- region: Optional[str] = Query(None, description="The region for the image search (e.g., 'us-US')."),
366
- safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
367
  max_results: int = Query(10, description="Maximum number of image results to return."),
368
  ):
369
- """
370
- Perform an image search on Bing.
371
- """
372
  try:
373
- results = bing.images(
374
- keywords=query,
375
- region=region,
376
- safesearch=safesearch,
377
- max_results=max_results,
378
- )
379
- return results
380
  except Exception as e:
381
  raise HTTPException(status_code=500, detail=str(e))
382
 
383
  @app.get("/news", response_model=List[BingNewsResult])
384
  async def news_search(
385
  query: str = Query(..., description="The search keywords for news."),
386
- region: Optional[str] = Query(None, description="The region for the news search (e.g., 'us-US')."),
387
- safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
388
  max_results: int = Query(10, description="Maximum number of news results to return."),
389
  ):
390
- """
391
- Perform a news search on Bing.
392
- """
393
  try:
394
- results = bing.news(
395
- keywords=query,
396
- region=region,
397
- safesearch=safesearch,
398
- max_results=max_results,
399
- )
400
- return results
401
  except Exception as e:
402
  raise HTTPException(status_code=500, detail=str(e))
403
 
 
404
  if __name__ == "__main__":
405
  import uvicorn
406
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
 
1
+ import asyncio
 
 
 
 
 
2
  import base64
3
+ import json
4
  from concurrent.futures import ThreadPoolExecutor
5
+ from typing import Any, Dict, List, Optional
6
+ from urllib.parse import urlparse
7
+
8
  from bs4 import BeautifulSoup
9
+ from curl_cffi.aio import AsyncSession # Using the async version
10
+ from fastapi import FastAPI, HTTPException, Query
11
+ from pydantic import BaseModel, Field
12
+ from webscout.litagent import LitAgent
13
 
14
+ # --- FastAPI App Definition ---
15
  app = FastAPI(
16
+ title="Snapzion Enhanced Search API",
17
+ description="An advanced FastAPI wrapper for Bing Search, featuring AI-powered summarization and metadata enrichment.",
18
+ version="2.0.0",
19
  )
20
 
21
+ # --- Pydantic Models for Clearer Responses ---
 
22
 
23
+ class BaseSearchResult(BaseModel):
24
  url: str
25
  title: str
26
  description: str
27
+
28
+ class EnhancedBingSearchResult(BaseSearchResult):
29
+ """Model for the enhanced search results with summary and metadata."""
30
+ summary: Optional[str] = Field(None, description="AI-generated summary of the page content.")
31
+ source: Optional[str] = Field(None, description="The domain name of the result URL.")
32
+ favicon: Optional[str] = Field(None, description="URL of the website's favicon.")
33
 
34
  class BingImageResult(BaseModel):
35
  title: str
 
44
  description: str
45
  source: str = ""
46
 
47
+ # --- Enhanced BingSearch Library ---
48
+
49
  class BingSearch:
50
+ """
51
+ Bing search implementation rewritten for asynchronous performance and enhanced data retrieval.
52
+ """
53
+ # LitAgent is a singleton to reuse its model
54
+ _lit_agent_instance: Optional[LitAgent] = None
55
+
56
+ # Run synchronous LitAgent in a thread pool to not block the event loop
57
+ _executor = ThreadPoolExecutor(max_workers=10)
58
+
59
 
60
  def __init__(
61
  self,
 
72
  self.lang = lang
73
  self.sleep_interval = sleep_interval
74
  self._base_url = "https://www.bing.com"
75
+ self.session = AsyncSession(
76
  proxies=self.proxies,
77
  verify=self.verify,
78
  timeout=self.timeout,
79
  impersonate=impersonate
80
  )
 
81
  self.session.headers.update({
82
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
83
  })
84
 
85
+ @classmethod
86
+ def get_lit_agent(cls) -> LitAgent:
87
+ """Initializes LitAgent lazily."""
88
+ if cls._lit_agent_instance is None:
89
+ cls._lit_agent_instance = LitAgent()
90
+ return cls._lit_agent_instance
91
+
92
+ async def _summarize_content(self, html_content: str) -> str:
93
+ """Runs the synchronous summarize method in a thread pool."""
94
+ loop = asyncio.get_running_loop()
95
+ agent = self.get_lit_agent()
96
+ try:
97
+ # Use to_thread to run blocking I/O or CPU-bound function in a separate thread
98
+ summary = await loop.run_in_executor(
99
+ self._executor, agent.summarize, html_content
100
+ )
101
+ return summary
102
+ except Exception as e:
103
+ print(f"Error during summarization: {e}")
104
+ return "Could not generate summary."
105
+
106
+
107
+ async def _enhance_result(self, result: BaseSearchResult) -> EnhancedBingSearchResult:
108
+ """Fetches page content, generates summary, and extracts metadata."""
109
+ enhanced_result = EnhancedBingSearchResult(**result.model_dump())
110
+ try:
111
+ # Set source from URL
112
+ parsed_url = urlparse(result.url)
113
+ enhanced_result.source = parsed_url.netloc
114
+
115
+ # Fetch page content for summarization and favicon
116
+ resp = await self.session.get(result.url, timeout=self.timeout)
117
+ resp.raise_for_status()
118
+ html = resp.text
119
+
120
+ # Generate AI summary
121
+ summary = await self._summarize_content(html)
122
+ enhanced_result.summary = summary
123
+
124
+ # Extract favicon
125
+ soup = BeautifulSoup(html, "html.parser")
126
+ favicon_tag = soup.find("link", rel=lambda r: r and "icon" in r.lower())
127
+ if favicon_tag and favicon_tag.get("href"):
128
+ favicon_url = favicon_tag["href"]
129
+ # Handle relative favicon URLs
130
+ if not favicon_url.startswith(('http://', 'https://')):
131
+ favicon_url = f"{parsed_url.scheme}://{parsed_url.netloc}{favicon_url}"
132
+ enhanced_result.favicon = favicon_url
133
+
134
+ except Exception as e:
135
+ print(f"Failed to enhance URL {result.url}: {e}")
136
+ # Silently fail enhancement, but still return base data
137
+
138
+ return enhanced_result
139
+
140
+
141
+ # ... (selectors, first_page, next_page, get_url methods remain the same) ...
142
  def _selectors(self, element):
143
  selectors = {
144
+ 'links': 'ol#b_results > li.b_algo',
145
+ 'next': 'a.sb_pagN'
146
  }
147
+ return selectors.get(element, '')
148
 
149
  def _first_page(self, query):
150
  url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
 
160
 
161
  def _get_url(self, tag):
162
  url = tag.get('href', '')
163
+ # This part handles Bing's weird tracking URLs
164
  try:
165
  parsed_url = urlparse(url)
166
+ if "r" in parsed_url.path: # Direct links are often in /r/
167
+ query_params = parse_qs(parsed_url.query)
168
+ if "u" in query_params:
169
+ encoded_url = query_params["u"][0][2:]
170
  decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
171
+ return decoded_bytes.decode('utf-8')
172
+ except Exception:
173
+ pass
174
+ return url
175
+
 
 
176
 
177
+ async def text(
 
178
  self,
179
  keywords: str,
180
  region: str = None,
181
  safesearch: str = "moderate",
182
  max_results: int = 10,
183
+ enhanced: bool = False
184
+ ) -> List[BaseSearchResult | EnhancedBingSearchResult]:
185
  if not keywords:
186
  raise ValueError("Search keywords cannot be empty")
187
 
188
  fetched_results = []
189
  fetched_links = set()
190
 
191
+ async def fetch_page(url):
192
  try:
193
+ resp = await self.session.get(url)
194
  resp.raise_for_status()
195
  return resp.text
196
  except Exception as e:
 
199
  current_url = self._first_page(keywords)['url']
200
 
201
  while current_url and len(fetched_results) < max_results:
202
+ html = await fetch_page(current_url)
203
  soup = BeautifulSoup(html, "html.parser")
204
 
 
205
  result_blocks = soup.select(self._selectors('links'))
206
 
207
  for result in result_blocks:
 
208
  title_tag = result.find('h2')
209
+ if not title_tag: continue
 
210
 
211
  link_tag = title_tag.find('a')
212
+ if not link_tag or not link_tag.has_attr('href'): continue
 
213
 
214
  url_val = self._get_url(link_tag)
215
  title = title_tag.get_text(strip=True)
216
 
 
217
  desc_container = result.find('div', class_='b_caption')
218
+ description = desc_container.find('p').get_text(strip=True) if desc_container and desc_container.find('p') else ""
 
 
 
 
 
 
 
219
 
 
 
 
 
 
 
220
  if url_val and title:
221
+ if url_val in fetched_links: continue
 
222
 
223
+ fetched_results.append(BaseSearchResult(url=url_val, title=title, description=description))
224
  fetched_links.add(url_val)
225
 
226
+ if len(fetched_results) >= max_results: break
 
227
 
228
+ if len(fetched_results) >= max_results: break
 
229
 
 
230
  next_page_info = self._next_page(soup)
231
  current_url = next_page_info['url']
232
  if current_url:
233
+ await asyncio.sleep(self.sleep_interval)
 
 
234
 
235
+ results_to_return = fetched_results[:max_results]
236
+
237
+ if enhanced and results_to_return:
238
+ # Concurrently enhance all results
239
+ enhancement_tasks = [self._enhance_result(res) for res in results_to_return]
240
+ return await asyncio.gather(*enhancement_tasks)
241
+
242
+ return results_to_return
243
 
244
+ # ... (suggestions, images, news methods converted to async) ...
245
+ async def suggestions(self, query: str, region: str = None) -> List[str]:
246
  if not query:
247
  raise ValueError("Search query cannot be empty")
248
+ # ... logic ...
249
+ url = f"https://api.bing.com/osjson.aspx?query={query}&mkt={region or 'en-US'}"
250
+ resp = await self.session.get(url)
251
+ resp.raise_for_status()
252
+ data = resp.json()
253
+ return data[1] if isinstance(data, list) and len(data) > 1 else []
 
 
 
 
 
 
 
 
 
 
 
254
 
255
+ async def images(self, keywords: str, max_results: int = 10, **kwargs) -> List[BingImageResult]:
256
+ # ... logic converted to async ...
257
+ url = f"{self._base_url}/images/search?q={keywords}&count={max_results}"
258
+ resp = await self.session.get(url)
259
+ resp.raise_for_status()
260
+ soup = BeautifulSoup(resp.text, "html.parser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  results = []
262
  for item in soup.select("a.iusc"):
263
  try:
264
  m = item.get("m")
265
  meta = json.loads(m) if m else {}
266
+ if meta.get("murl"):
267
+ results.append(BingImageResult(title=meta.get("t", ""), image=meta.get("murl"), thumbnail=meta.get("turl", ""), url=meta.get("purl", ""), source=meta.get("surl", "")))
268
+ if len(results) >= max_results: break
269
+ except Exception: continue
270
+ return results
271
+
272
+ async def news(self, keywords: str, max_results: int = 10, **kwargs) -> List[BingNewsResult]:
273
+ # ... logic converted to async ...
274
+ url = f"{self._base_url}/news/search?q={keywords}"
275
+ resp = await self.session.get(url)
276
+ resp.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  soup = BeautifulSoup(resp.text, "html.parser")
278
  results = []
279
+ for item in soup.select("div.news-card"):
280
+ a_tag = item.find("a", class_="title")
281
+ if not a_tag: continue
282
+ desc_tag = item.find("div", class_="snippet")
283
+ source_tag = item.find(attrs={"aria-label": "Publisher"})
284
+ results.append(BingNewsResult(title=a_tag.get_text(strip=True), url=a_tag['href'], description=desc_tag.get_text(strip=True) if desc_tag else "", source=source_tag.get_text(strip=True) if source_tag else ""))
285
+ if len(results) >= max_results: break
286
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
288
  bing = BingSearch()
289
 
290
+ # --- FastAPI Endpoints ---
291
+
292
+ # Use a union type in response_model to reflect the two possible return types
293
+ @app.get("/search", response_model=List[EnhancedBingSearchResult | BaseSearchResult])
294
  async def text_search(
295
  query: str = Query(..., description="The search keywords."),
296
+ region: Optional[str] = Query(None, description="The region for the search (e.g., 'en-US')."),
297
  safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
298
  max_results: int = Query(10, description="Maximum number of results to return."),
299
+ enhanced: bool = Query(False, description="Enable AI summarization and metadata fetching (slower but more detailed).")
300
  ):
301
  """
302
  Perform a text search on Bing.
303
+ - Set `enhanced=true` to get AI-powered summaries and additional metadata for each result.
304
  """
305
  try:
306
+ results = await bing.text(
307
  keywords=query,
308
  region=region,
309
  safesearch=safesearch,
310
  max_results=max_results,
311
+ enhanced=enhanced
312
  )
313
  return results
314
  except Exception as e:
 
319
  query: str = Query(..., description="The search query for which to fetch suggestions."),
320
  region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
321
  ):
322
+ """Fetches search suggestions for a given query."""
 
 
323
  try:
324
+ return await bing.suggestions(query=query, region=region)
 
325
  except Exception as e:
326
  raise HTTPException(status_code=500, detail=str(e))
327
 
328
  @app.get("/images", response_model=List[BingImageResult])
329
  async def image_search(
330
  query: str = Query(..., description="The search keywords for images."),
 
 
331
  max_results: int = Query(10, description="Maximum number of image results to return."),
332
  ):
333
+ """Perform an image search on Bing."""
 
 
334
  try:
335
+ return await bing.images(keywords=query, max_results=max_results)
 
 
 
 
 
 
336
  except Exception as e:
337
  raise HTTPException(status_code=500, detail=str(e))
338
 
339
  @app.get("/news", response_model=List[BingNewsResult])
340
  async def news_search(
341
  query: str = Query(..., description="The search keywords for news."),
 
 
342
  max_results: int = Query(10, description="Maximum number of news results to return."),
343
  ):
344
+ """Perform a news search on Bing."""
 
 
345
  try:
346
+ return await bing.news(keywords=query, max_results=max_results)
 
 
 
 
 
 
347
  except Exception as e:
348
  raise HTTPException(status_code=500, detail=str(e))
349
 
350
+
351
  if __name__ == "__main__":
352
  import uvicorn
353
+ # Add reload=True for development convenience
354
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)