Spaces:

Greff3
/

Brave

Sleeping

App Files Files Community

Husnain Rasheed commited on Sep 27, 2025

Commit

db4af16

verified ·

1 Parent(s): ec1cc34

Update main.py

Browse files

Files changed (1) hide show

main.py +133 -58

main.py CHANGED Viewed

@@ -2,6 +2,7 @@ import time
 import json
 import base64
 from typing import List, Optional, Dict, Any
 import uvicorn
 from fastapi import FastAPI, HTTPException, Query, Request, Response
@@ -14,7 +15,10 @@ from bs4 import BeautifulSoup
 # --- Pydantic Models for API Responses ---
 class SearchResultMetadata(BaseModel):
-    """Defines the structure for metadata associated with a search result."""
     sitelinks: Optional[List[Dict[str, str]]] = Field(
         None, description="A list of sitelinks (title and URL) found under the main result."
     )
@@ -23,16 +27,16 @@ class SearchResultMetadata(BaseModel):
     )
 class BingSearchResult(BaseModel):
-    """Represents a single text search result."""
     url: str = Field(..., description="The direct URL of the search result.")
     title: str = Field(..., description="The title of the search result.")
     description: str = Field(..., description="A brief description or snippet of the search result.")
     metadata: SearchResultMetadata = Field(
-        default_factory=SearchResultMetadata, description="Additional metadata scraped for the result."
     )
 class BingImageResult(BaseModel):
-    """Represents a single image search result."""
     title: str = Field(..., description="The title or description of the image.")
     image_url: str = Field(..., description="The direct URL to the full-size image.")
     thumbnail_url: str = Field(..., description="The URL to the thumbnail of the image.")
@@ -40,7 +44,7 @@ class BingImageResult(BaseModel):
     source: str = Field(..., description="The source or domain of the image.")
 class BingNewsResult(BaseModel):
-    """Represents a single news article search result."""
     title: str = Field(..., description="The headline of the news article.")
     url: str = Field(..., description="The URL to the full news article.")
     description: str = Field(..., description="A snippet from the news article.")
@@ -50,7 +54,10 @@ class BingNewsResult(BaseModel):
 # --- Custom Middleware for Response Headers ---
 class CustomHeaderMiddleware(BaseHTTPMiddleware):
-    """Middleware to add custom headers to every API response."""
     async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
         start_time = time.time()
         response = await call_next(request)
@@ -63,17 +70,21 @@ class CustomHeaderMiddleware(BaseHTTPMiddleware):
 # --- Bing Search Service ---
 class BingSearch:
-    """Asynchronous Bing search implementation with advanced web scraping capabilities."""
     def __init__(
         self,
-        timeout: int = 10,
         proxies: Optional[Dict[str, str]] = None,
         lang: str = "en-US",
         impersonate: str = "chrome110"
     ):
         self.timeout = timeout
-        self.proxies = proxies if proxies else {}
         self.lang = lang
         self._base_url = "https://www.bing.com"
         self.session = AsyncSession(
@@ -81,52 +92,54 @@ class BingSearch:
             timeout=self.timeout,
             impersonate=impersonate
         )
         self.session.headers.update({
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
         })
     async def _fetch_html(self, url: str) -> str:
         try:
             resp = await self.session.get(url)
             resp.raise_for_status()
             return resp.text
         except Exception as e:
-            raise HTTPException(status_code=502, detail=f"Failed to fetch Bing search results: {e}")
     def _parse_url(self, url: Optional[str]) -> str:
         if not url:
             return ""
         try:
-            # Bing often uses a redirect URL; this attempts to extract the real URL.
             parsed_url = urlparse(url)
             query_params = parse_qs(parsed_url.query)
             if "u" in query_params:
-                # The real URL is often Base64 encoded in the 'u' parameter.
-                encoded_url = query_params["u"][0].replace("h=", "").split("&")[0]
-                # Pad the string for correct Base64 decoding.
                 decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
                 return decoded_bytes.decode('utf-8', errors='ignore')
-        except Exception:
-            # If parsing fails, return the original URL.
             return url
         return url
     async def text(
-        self,
-        keywords: str,
-        region: Optional[str] = None,
-        max_results: int = 10,
     ) -> List[BingSearchResult]:
         if not keywords:
             raise ValueError("Search keywords cannot be empty.")
-        fetched_results = []
-        url = f'{self._base_url}/search?q={urlencode({"q": keywords})}&form=QBLH'
         if region:
             url += f"&setmkt={region}"
         html = await self._fetch_html(url)
         soup = BeautifulSoup(html, "html.parser")
         for result in soup.select('li.b_algo'):
             if len(fetched_results) >= max_results:
@@ -134,7 +147,6 @@ class BingSearch:
             title_tag = result.find('h2')
             link_tag = title_tag.find('a') if title_tag else None
             if not link_tag or not link_tag.has_attr('href'):
                 continue
@@ -142,38 +154,40 @@ class BingSearch:
             title = link_tag.get_text(strip=True)
             description = result.find('p').get_text(strip=True) if result.find('p') else ""
-            # --- Metadata Extraction ---
-            sitelinks = []
-            sitelinks_container = result.select_one('ul.b_vlist')
-            if sitelinks_container:
-                for link_item in sitelinks_container.select('li a'):
-                    sitelinks.append({
-                        "title": link_item.get_text(strip=True),
-                        "url": self._parse_url(link_item.get('href'))
-                    })
-            displayed_url_tag = result.select_one('cite')
-            displayed_url = displayed_url_tag.get_text(strip=True) if displayed_url_tag else None
-            metadata = SearchResultMetadata(
-                sitelinks=sitelinks if sitelinks else None,
-                displayed_url=displayed_url
-            )
             if url_val and title:
                 fetched_results.append(
                     BingSearchResult(url=url_val, title=title, description=description, metadata=metadata)
                 )
         return fetched_results
-    async def images(
-        self, keywords: str, max_results: int = 10
-    ) -> List[BingImageResult]:
         if not keywords:
             raise ValueError("Search keywords cannot be empty.")
-        url = f"{self._base_url}/images/search?{urlencode({'q': keywords})}"
         html = await self._fetch_html(url)
         soup = BeautifulSoup(html, "html.parser")
         results = []
@@ -182,15 +196,12 @@ class BingSearch:
             if len(results) >= max_results:
                 break
             try:
-                meta_json = item.get("m")
-                if not meta_json:
-                    continue
-                meta = json.loads(meta_json)
-                if meta.get("murl"):
                     results.append(
                         BingImageResult(
                             title=meta.get("t", ""),
-                            image_url=meta.get("murl", ""),
                             thumbnail_url=meta.get("turl", ""),
                             page_url=meta.get("purl", ""),
                             source=urlparse(meta.get("purl", "")).netloc
@@ -200,13 +211,45 @@ class BingSearch:
                 continue
         return results
 # --- FastAPI Application Setup ---
 app = FastAPI(
     title="Bing Search API",
     description="An advanced, asynchronous FastAPI wrapper to scrape Bing search results, powered by NiansuhAI.",
-    version="3.0.0",
 )
 app.add_middleware(CustomHeaderMiddleware)
@@ -219,27 +262,59 @@ bing_search_service = BingSearch()
 async def text_search(
     query: str = Query(..., description="The search keywords."),
     region: Optional[str] = Query(None, description="The market/region for the search (e.g., 'en-US')."),
-    max_results: int = Query(10, ge=1, le=50, description="Maximum number of results to return."),
 ):
     try:
         return await bing_search_service.text(keywords=query, region=region, max_results=max_results)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
 @app.get("/images", response_model=List[BingImageResult], summary="Perform an image search")
 async def image_search(
     query: str = Query(..., description="The search keywords for images."),
-    max_results: int = Query(10, ge=1, le=50, description="Maximum number of image results to return."),
 ):
     try:
         return await bing_search_service.images(keywords=query, max_results=max_results)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {e}")
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)

 import json
 import base64
 from typing import List, Optional, Dict, Any
+from urllib.parse import urlencode, urlparse, parse_qs
 import uvicorn
 from fastapi import FastAPI, HTTPException, Query, Request, Response
 # --- Pydantic Models for API Responses ---
 class SearchResultMetadata(BaseModel):
+    """
+    Defines the structure for rich metadata associated with a search result,
+    such as sitelinks and the display URL.
+    """
     sitelinks: Optional[List[Dict[str, str]]] = Field(
         None, description="A list of sitelinks (title and URL) found under the main result."
     )
     )
 class BingSearchResult(BaseModel):
+    """Represents a single text search result from Bing."""
     url: str = Field(..., description="The direct URL of the search result.")
     title: str = Field(..., description="The title of the search result.")
     description: str = Field(..., description="A brief description or snippet of the search result.")
     metadata: SearchResultMetadata = Field(
+        default_factory=SearchResultMetadata, description="Additional rich metadata scraped for the result."
     )
 class BingImageResult(BaseModel):
+    """Represents a single image search result from Bing."""
     title: str = Field(..., description="The title or description of the image.")
     image_url: str = Field(..., description="The direct URL to the full-size image.")
     thumbnail_url: str = Field(..., description="The URL to the thumbnail of the image.")
     source: str = Field(..., description="The source or domain of the image.")
 class BingNewsResult(BaseModel):
+    """Represents a single news article search result from Bing."""
     title: str = Field(..., description="The headline of the news article.")
     url: str = Field(..., description="The URL to the full news article.")
     description: str = Field(..., description="A snippet from the news article.")
 # --- Custom Middleware for Response Headers ---
 class CustomHeaderMiddleware(BaseHTTPMiddleware):
+    """
+    This middleware adds custom headers to every API response, including
+    the processing time and a 'Powered-By' header.
+    """
     async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
         start_time = time.time()
         response = await call_next(request)
 # --- Bing Search Service ---
 class BingSearch:
+    """
+    An asynchronous service class for scraping search results from Bing.
+    It handles text, image, news, and suggestion searches using curl_cffi
+    for efficient, non-blocking HTTP requests.
+    """
     def __init__(
         self,
+        timeout: int = 15,
         proxies: Optional[Dict[str, str]] = None,
         lang: str = "en-US",
         impersonate: str = "chrome110"
     ):
         self.timeout = timeout
+        self.proxies = proxies or {}
         self.lang = lang
         self._base_url = "https://www.bing.com"
         self.session = AsyncSession(
             timeout=self.timeout,
             impersonate=impersonate
         )
+        # Use a realistic User-Agent to mimic a real browser
         self.session.headers.update({
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
+            "Accept-Language": "en-US,en;q=0.9",
         })
     async def _fetch_html(self, url: str) -> str:
+        """Asynchronously fetches HTML content from a given URL."""
         try:
             resp = await self.session.get(url)
             resp.raise_for_status()
             return resp.text
         except Exception as e:
+            # Raise an HTTPException that FastAPI can handle gracefully
+            raise HTTPException(status_code=502, detail=f"Failed to fetch Bing content: {e}")
     def _parse_url(self, url: Optional[str]) -> str:
+        """Decodes Bing's redirect URLs to find the actual destination URL."""
         if not url:
             return ""
         try:
             parsed_url = urlparse(url)
             query_params = parse_qs(parsed_url.query)
             if "u" in query_params:
+                encoded_url = query_params["u"][0].split("&")[0]
+                # Decode the Base64-encoded URL
                 decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
                 return decoded_bytes.decode('utf-8', errors='ignore')
+        except (KeyError, IndexError, Exception):
+            # Fallback to the original URL if parsing fails
             return url
         return url
     async def text(
+        self, keywords: str, region: Optional[str], max_results: int
     ) -> List[BingSearchResult]:
+        """Performs a text search and scrapes the results page."""
         if not keywords:
             raise ValueError("Search keywords cannot be empty.")
+        params = {"q": keywords, "form": "QBLH"}
+        url = f'{self._base_url}/search?{urlencode(params)}'
         if region:
             url += f"&setmkt={region}"
         html = await self._fetch_html(url)
         soup = BeautifulSoup(html, "html.parser")
+        fetched_results = []
         for result in soup.select('li.b_algo'):
             if len(fetched_results) >= max_results:
             title_tag = result.find('h2')
             link_tag = title_tag.find('a') if title_tag else None
             if not link_tag or not link_tag.has_attr('href'):
                 continue
             title = link_tag.get_text(strip=True)
             description = result.find('p').get_text(strip=True) if result.find('p') else ""
+            sitelinks = [
+                {"title": a.get_text(strip=True), "url": self._parse_url(a.get('href'))}
+                for a in result.select('ul.b_vlist li a')
+            ]
+            displayed_url = result.cite.get_text(strip=True) if result.cite else None
+            metadata = SearchResultMetadata(sitelinks=sitelinks or None, displayed_url=displayed_url)
             if url_val and title:
                 fetched_results.append(
                     BingSearchResult(url=url_val, title=title, description=description, metadata=metadata)
                 )
         return fetched_results
+    async def suggestions(self, query: str, region: Optional[str]) -> List[str]:
+        """Fetches auto-complete suggestions for a given query."""
+        if not query:
+            raise ValueError("Search query cannot be empty.")
+        params = {"query": query, "mkt": region or "en-US"}
+        url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
+        try:
+            resp = await self.session.get(url)
+            resp.raise_for_status()
+            data = resp.json()
+            return data[1] if isinstance(data, list) and len(data) > 1 else []
+        except Exception as e:
+            raise HTTPException(status_code=502, detail=f"Failed to fetch suggestions: {e}")
+    async def images(self, keywords: str, max_results: int) -> List[BingImageResult]:
+        """Performs an image search and scrapes the results."""
         if not keywords:
             raise ValueError("Search keywords cannot be empty.")
+        params = {"q": keywords, "count": max_results}
+        url = f"{self._base_url}/images/search?{urlencode(params)}"
         html = await self._fetch_html(url)
         soup = BeautifulSoup(html, "html.parser")
         results = []
             if len(results) >= max_results:
                 break
             try:
+                meta = json.loads(item["m"])
+                if "murl" in meta:
                     results.append(
                         BingImageResult(
                             title=meta.get("t", ""),
+                            image_url=meta["murl"],
                             thumbnail_url=meta.get("turl", ""),
                             page_url=meta.get("purl", ""),
                             source=urlparse(meta.get("purl", "")).netloc
                 continue
         return results
+    async def news(self, keywords: str, region: Optional[str], max_results: int) -> List[BingNewsResult]:
+        """Performs a news search and scrapes the results."""
+        if not keywords:
+            raise ValueError("Search keywords cannot be empty.")
+        params = {"q": keywords, "form": "QBNH"}
+        if region:
+            params["mkt"] = region
+        url = f"{self._base_url}/news/search?{urlencode(params)}"
+        html = await self._fetch_html(url)
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for item in soup.select("div.news-card"):
+            if len(results) >= max_results:
+                break
+            a_tag = item.find("a", class_="title")
+            snippet_tag = item.find("div", class_="snippet")
+            source_tag = item.find("div", class_="source")
+            if a_tag and a_tag.has_attr('href'):
+                results.append(
+                    BingNewsResult(
+                        title=a_tag.get_text(strip=True),
+                        url=a_tag['href'],
+                        description=snippet_tag.get_text(strip=True) if snippet_tag else "",
+                        source=source_tag.get_text(strip=True).split('·')[0].strip() if source_tag else "",
+                    )
+                )
+        return results
 # --- FastAPI Application Setup ---
 app = FastAPI(
     title="Bing Search API",
     description="An advanced, asynchronous FastAPI wrapper to scrape Bing search results, powered by NiansuhAI.",
+    version="3.1.0",
 )
 app.add_middleware(CustomHeaderMiddleware)
 async def text_search(
     query: str = Query(..., description="The search keywords."),
     region: Optional[str] = Query(None, description="The market/region for the search (e.g., 'en-US')."),
+    max_results: int = Query(10, ge=1, le=30, description="Maximum number of results to return."),
 ):
+    """
+    Performs a text search on Bing and returns a list of results,
+    each enriched with metadata like sitelinks.
+    """
     try:
         return await bing_search_service.text(keywords=query, region=region, max_results=max_results)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
     except Exception as e:
+        # Catch-all for any other unexpected errors
+        raise HTTPException(status_code=500, detail=f"An unexpected internal error occurred: {e}")
+@app.get("/suggestions", response_model=List[str], summary="Get real-time search suggestions")
+async def get_suggestions(
+    query: str = Query(..., description="The partial search query for which to fetch suggestions."),
+    region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
+):
+    """Fetches real-time search suggestions from Bing's autocomplete service."""
+    try:
+        return await bing_search_service.suggestions(query=query, region=region)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
 @app.get("/images", response_model=List[BingImageResult], summary="Perform an image search")
 async def image_search(
     query: str = Query(..., description="The search keywords for images."),
+    max_results: int = Query(20, ge=1, le=100, description="Maximum number of image results to return."),
 ):
+    """Performs an image search on Bing and returns a list of image results."""
     try:
         return await bing_search_service.images(keywords=query, max_results=max_results)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
+@app.get("/news", response_model=List[BingNewsResult], summary="Perform a news search")
+async def news_search(
+    query: str = Query(..., description="The search keywords for news articles."),
+    region: Optional[str] = Query(None, description="The region for the news search (e.g., 'en-US')."),
+    max_results: int = Query(15, ge=1, le=50, description="Maximum number of news results to return."),
+):
+    """Performs a news search on Bing and returns a list of recent articles."""
+    try:
+        return await bing_search_service.news(keywords=query, region=region, max_results=max_results)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+@app.get("/", include_in_schema=False)
+async def root():
+    return {"message": "Bing Search API is running. Visit /docs for documentation."}
 if __name__ == "__main__":
+    # Standard entry point to run the FastAPI application using Uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)