Spaces:

Greff3
/

Brave

Sleeping

App Files Files Community

Greff3 commited on Oct 5, 2025

Commit

5a6ba72

verified ·

1 Parent(s): 084da71

Update main.py

Browse files

Files changed (1) hide show

main.py +34 -74

main.py CHANGED Viewed

@@ -3,10 +3,11 @@ import base64
 import json
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional
-from urllib.parse import urlparse
 from bs4 import BeautifulSoup
-from curl_cffi.aio import AsyncSession # Using the async version
 from fastapi import FastAPI, HTTPException, Query
 from pydantic import BaseModel, Field
 from webscout.litagent import LitAgent
@@ -15,7 +16,7 @@ from webscout.litagent import LitAgent
 app = FastAPI(
     title="Snapzion Enhanced Search API",
     description="An advanced FastAPI wrapper for Bing Search, featuring AI-powered summarization and metadata enrichment.",
-    version="2.0.0",
 )
 # --- Pydantic Models for Clearer Responses ---
@@ -50,13 +51,9 @@ class BingSearch:
     """
     Bing search implementation rewritten for asynchronous performance and enhanced data retrieval.
     """
-    # LitAgent is a singleton to reuse its model
     _lit_agent_instance: Optional[LitAgent] = None
-    # Run synchronous LitAgent in a thread pool to not block the event loop
     _executor = ThreadPoolExecutor(max_workers=10)
     def __init__(
         self,
         timeout: int = 10,
@@ -94,7 +91,6 @@ class BingSearch:
         loop = asyncio.get_running_loop()
         agent = self.get_lit_agent()
         try:
-            # Use to_thread to run blocking I/O or CPU-bound function in a separate thread
             summary = await loop.run_in_executor(
                 self._executor, agent.summarize, html_content
             )
@@ -108,37 +104,29 @@ class BingSearch:
         """Fetches page content, generates summary, and extracts metadata."""
         enhanced_result = EnhancedBingSearchResult(**result.model_dump())
         try:
-            # Set source from URL
             parsed_url = urlparse(result.url)
             enhanced_result.source = parsed_url.netloc
-            # Fetch page content for summarization and favicon
             resp = await self.session.get(result.url, timeout=self.timeout)
             resp.raise_for_status()
             html = resp.text
-            # Generate AI summary
             summary = await self._summarize_content(html)
             enhanced_result.summary = summary
-            # Extract favicon
             soup = BeautifulSoup(html, "html.parser")
             favicon_tag = soup.find("link", rel=lambda r: r and "icon" in r.lower())
             if favicon_tag and favicon_tag.get("href"):
                 favicon_url = favicon_tag["href"]
-                # Handle relative favicon URLs
-                if not favicon_url.startswith(('http://', 'https://')):
-                    favicon_url = f"{parsed_url.scheme}://{parsed_url.netloc}{favicon_url}"
                 enhanced_result.favicon = favicon_url
         except Exception as e:
             print(f"Failed to enhance URL {result.url}: {e}")
-            # Silently fail enhancement, but still return base data
         return enhanced_result
-    # ... (selectors, first_page, next_page, get_url methods remain the same) ...
     def _selectors(self, element):
         selectors = {
             'links': 'ol#b_results > li.b_algo',
@@ -153,34 +141,16 @@ class BingSearch:
     def _next_page(self, soup):
         selector = self._selectors('next')
         next_page_tag = soup.select_one(selector)
-        url = None
         if next_page_tag and next_page_tag.get('href'):
-            url = self._base_url + next_page_tag['href']
-        return {'url': url, 'data': None}
     def _get_url(self, tag):
-        url = tag.get('href', '')
-        # This part handles Bing's weird tracking URLs
-        try:
-            parsed_url = urlparse(url)
-            if "r" in parsed_url.path: # Direct links are often in /r/
-                query_params = parse_qs(parsed_url.query)
-                if "u" in query_params:
-                    encoded_url = query_params["u"][0][2:]
-                    decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
-                    return decoded_bytes.decode('utf-8')
-        except Exception:
-            pass
-        return url
     async def text(
-        self,
-        keywords: str,
-        region: str = None,
-        safesearch: str = "moderate",
-        max_results: int = 10,
-        enhanced: bool = False
     ) -> List[BaseSearchResult | EnhancedBingSearchResult]:
         if not keywords:
             raise ValueError("Search keywords cannot be empty")
@@ -215,14 +185,16 @@ class BingSearch:
                 title = title_tag.get_text(strip=True)
                 desc_container = result.find('div', class_='b_caption')
-                description = desc_container.find('p').get_text(strip=True) if desc_container and desc_container.find('p') else ""
                 if url_val and title:
                     if url_val in fetched_links: continue
                     fetched_results.append(BaseSearchResult(url=url_val, title=title, description=description))
                     fetched_links.add(url_val)
                     if len(fetched_results) >= max_results: break
             if len(fetched_results) >= max_results: break
@@ -235,25 +207,22 @@ class BingSearch:
         results_to_return = fetched_results[:max_results]
         if enhanced and results_to_return:
-            # Concurrently enhance all results
             enhancement_tasks = [self._enhance_result(res) for res in results_to_return]
             return await asyncio.gather(*enhancement_tasks)
         return results_to_return
-    # ... (suggestions, images, news methods converted to async) ...
-    async def suggestions(self, query: str, region: str = None) -> List[str]:
-        if not query:
-            raise ValueError("Search query cannot be empty")
-        # ... logic ...
-        url = f"https://api.bing.com/osjson.aspx?query={query}&mkt={region or 'en-US'}"
         resp = await self.session.get(url)
         resp.raise_for_status()
         data = resp.json()
         return data[1] if isinstance(data, list) and len(data) > 1 else []
     async def images(self, keywords: str, max_results: int = 10, **kwargs) -> List[BingImageResult]:
-        # ... logic converted to async ...
         url = f"{self._base_url}/images/search?q={keywords}&count={max_results}"
         resp = await self.session.get(url)
         resp.raise_for_status()
@@ -261,8 +230,9 @@ class BingSearch:
         results = []
         for item in soup.select("a.iusc"):
             try:
-                m = item.get("m")
-                meta = json.loads(m) if m else {}
                 if meta.get("murl"):
                     results.append(BingImageResult(title=meta.get("t", ""), image=meta.get("murl"), thumbnail=meta.get("turl", ""), url=meta.get("purl", ""), source=meta.get("surl", "")))
                     if len(results) >= max_results: break
@@ -270,7 +240,7 @@ class BingSearch:
         return results
     async def news(self, keywords: str, max_results: int = 10, **kwargs) -> List[BingNewsResult]:
-        # ... logic converted to async ...
         url = f"{self._base_url}/news/search?q={keywords}"
         resp = await self.session.get(url)
         resp.raise_for_status()
@@ -278,7 +248,7 @@ class BingSearch:
         results = []
         for item in soup.select("div.news-card"):
              a_tag = item.find("a", class_="title")
-             if not a_tag: continue
              desc_tag = item.find("div", class_="snippet")
              source_tag = item.find(attrs={"aria-label": "Publisher"})
              results.append(BingNewsResult(title=a_tag.get_text(strip=True), url=a_tag['href'], description=desc_tag.get_text(strip=True) if desc_tag else "", source=source_tag.get_text(strip=True) if source_tag else ""))
@@ -289,12 +259,9 @@ bing = BingSearch()
 # --- FastAPI Endpoints ---
-# Use a union type in response_model to reflect the two possible return types
-@app.get("/search", response_model=List[EnhancedBingSearchResult | BaseSearchResult])
 async def text_search(
     query: str = Query(..., description="The search keywords."),
-    region: Optional[str] = Query(None, description="The region for the search (e.g., 'en-US')."),
-    safesearch: str = Query("moderate", description="Safe search level ('on', 'moderate', 'off')."),
     max_results: int = Query(10, description="Maximum number of results to return."),
     enhanced: bool = Query(False, description="Enable AI summarization and metadata fetching (slower but more detailed).")
 ):
@@ -305,8 +272,6 @@ async def text_search(
     try:
         results = await bing.text(
             keywords=query,
-            region=region,
-            safesearch=safesearch,
             max_results=max_results,
             enhanced=enhanced
         )
@@ -314,34 +279,30 @@ async def text_search(
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.get("/suggestions", response_model=List[str])
 async def get_suggestions(
     query: str = Query(..., description="The search query for which to fetch suggestions."),
-    region: Optional[str] = Query(None, description="The region for the suggestions (e.g., 'en-US')."),
 ):
-    """Fetches search suggestions for a given query."""
     try:
-        return await bing.suggestions(query=query, region=region)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.get("/images", response_model=List[BingImageResult])
 async def image_search(
     query: str = Query(..., description="The search keywords for images."),
     max_results: int = Query(10, description="Maximum number of image results to return."),
 ):
-    """Perform an image search on Bing."""
     try:
         return await bing.images(keywords=query, max_results=max_results)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.get("/news", response_model=List[BingNewsResult])
 async def news_search(
     query: str = Query(..., description="The search keywords for news."),
     max_results: int = Query(10, description="Maximum number of news results to return."),
 ):
-    """Perform a news search on Bing."""
     try:
         return await bing.news(keywords=query, max_results=max_results)
     except Exception as e:
@@ -350,5 +311,4 @@ async def news_search(
 if __name__ == "__main__":
     import uvicorn
-    # Add reload=True for development convenience
     uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)

 import json
 from concurrent.futures import ThreadPoolExecutor
 from typing import Any, Dict, List, Optional
+from urllib.parse import parse_qs, urlparse
 from bs4 import BeautifulSoup
+# This import will work correctly after running `pip install --upgrade curl_cffi`
+from curl_cffi.aio import AsyncSession
 from fastapi import FastAPI, HTTPException, Query
 from pydantic import BaseModel, Field
 from webscout.litagent import LitAgent
 app = FastAPI(
     title="Snapzion Enhanced Search API",
     description="An advanced FastAPI wrapper for Bing Search, featuring AI-powered summarization and metadata enrichment.",
+    version="2.0.1", # Version bump
 )
 # --- Pydantic Models for Clearer Responses ---
     """
     Bing search implementation rewritten for asynchronous performance and enhanced data retrieval.
     """
     _lit_agent_instance: Optional[LitAgent] = None
     _executor = ThreadPoolExecutor(max_workers=10)
     def __init__(
         self,
         timeout: int = 10,
         loop = asyncio.get_running_loop()
         agent = self.get_lit_agent()
         try:
             summary = await loop.run_in_executor(
                 self._executor, agent.summarize, html_content
             )
         """Fetches page content, generates summary, and extracts metadata."""
         enhanced_result = EnhancedBingSearchResult(**result.model_dump())
         try:
             parsed_url = urlparse(result.url)
             enhanced_result.source = parsed_url.netloc
             resp = await self.session.get(result.url, timeout=self.timeout)
             resp.raise_for_status()
             html = resp.text
             summary = await self._summarize_content(html)
             enhanced_result.summary = summary
             soup = BeautifulSoup(html, "html.parser")
             favicon_tag = soup.find("link", rel=lambda r: r and "icon" in r.lower())
             if favicon_tag and favicon_tag.get("href"):
                 favicon_url = favicon_tag["href"]
+                if not favicon_url.startswith(('http://', 'https://', '//')):
+                    favicon_url = f"{parsed_url.scheme}://{parsed_url.netloc}{'/' if not favicon_url.startswith('/') else ''}{favicon_url}"
+                elif favicon_url.startswith('//'):
+                    favicon_url = f"{parsed_url.scheme}:{favicon_url}"
                 enhanced_result.favicon = favicon_url
         except Exception as e:
             print(f"Failed to enhance URL {result.url}: {e}")
         return enhanced_result
     def _selectors(self, element):
         selectors = {
             'links': 'ol#b_results > li.b_algo',
     def _next_page(self, soup):
         selector = self._selectors('next')
         next_page_tag = soup.select_one(selector)
         if next_page_tag and next_page_tag.get('href'):
+            return {'url': self._base_url + next_page_tag['href'], 'data': None}
+        return {'url': None, 'data': None}
     def _get_url(self, tag):
+        # A more direct approach that is often sufficient
+        return tag.get('href', '')
     async def text(
+        self, keywords: str, max_results: int = 10, enhanced: bool = False, **kwargs
     ) -> List[BaseSearchResult | EnhancedBingSearchResult]:
         if not keywords:
             raise ValueError("Search keywords cannot be empty")
                 title = title_tag.get_text(strip=True)
                 desc_container = result.find('div', class_='b_caption')
+                description = ""
+                if desc_container:
+                    p_tag = desc_container.find('p')
+                    if p_tag:
+                        description = p_tag.get_text(strip=True)
                 if url_val and title:
                     if url_val in fetched_links: continue
                     fetched_results.append(BaseSearchResult(url=url_val, title=title, description=description))
                     fetched_links.add(url_val)
                     if len(fetched_results) >= max_results: break
             if len(fetched_results) >= max_results: break
         results_to_return = fetched_results[:max_results]
         if enhanced and results_to_return:
             enhancement_tasks = [self._enhance_result(res) for res in results_to_return]
             return await asyncio.gather(*enhancement_tasks)
         return results_to_return
+    async def suggestions(self, query: str, **kwargs) -> List[str]:
+        if not query: raise ValueError("Query cannot be empty")
+        region = kwargs.get('region', 'en-US')
+        url = f"https://api.bing.com/osjson.aspx?query={query}&mkt={region}"
         resp = await self.session.get(url)
         resp.raise_for_status()
         data = resp.json()
         return data[1] if isinstance(data, list) and len(data) > 1 else []
     async def images(self, keywords: str, max_results: int = 10, **kwargs) -> List[BingImageResult]:
+        if not keywords: raise ValueError("Keywords cannot be empty")
         url = f"{self._base_url}/images/search?q={keywords}&count={max_results}"
         resp = await self.session.get(url)
         resp.raise_for_status()
         results = []
         for item in soup.select("a.iusc"):
             try:
+                m_data = item.get("m")
+                if not m_data: continue
+                meta = json.loads(m_data)
                 if meta.get("murl"):
                     results.append(BingImageResult(title=meta.get("t", ""), image=meta.get("murl"), thumbnail=meta.get("turl", ""), url=meta.get("purl", ""), source=meta.get("surl", "")))
                     if len(results) >= max_results: break
         return results
     async def news(self, keywords: str, max_results: int = 10, **kwargs) -> List[BingNewsResult]:
+        if not keywords: raise ValueError("Keywords cannot be empty")
         url = f"{self._base_url}/news/search?q={keywords}"
         resp = await self.session.get(url)
         resp.raise_for_status()
         results = []
         for item in soup.select("div.news-card"):
              a_tag = item.find("a", class_="title")
+             if not (a_tag and a_tag.has_attr('href')): continue
              desc_tag = item.find("div", class_="snippet")
              source_tag = item.find(attrs={"aria-label": "Publisher"})
              results.append(BingNewsResult(title=a_tag.get_text(strip=True), url=a_tag['href'], description=desc_tag.get_text(strip=True) if desc_tag else "", source=source_tag.get_text(strip=True) if source_tag else ""))
 # --- FastAPI Endpoints ---
+@app.get("/search", response_model=List[EnhancedBingSearchResult | BaseSearchResult], summary="Perform a standard or enhanced text search")
 async def text_search(
     query: str = Query(..., description="The search keywords."),
     max_results: int = Query(10, description="Maximum number of results to return."),
     enhanced: bool = Query(False, description="Enable AI summarization and metadata fetching (slower but more detailed).")
 ):
     try:
         results = await bing.text(
             keywords=query,
             max_results=max_results,
             enhanced=enhanced
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.get("/suggestions", response_model=List[str], summary="Fetch search suggestions")
 async def get_suggestions(
     query: str = Query(..., description="The search query for which to fetch suggestions."),
 ):
     try:
+        return await bing.suggestions(query=query)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.get("/images", response_model=List[BingImageResult], summary="Search for images")
 async def image_search(
     query: str = Query(..., description="The search keywords for images."),
     max_results: int = Query(10, description="Maximum number of image results to return."),
 ):
     try:
         return await bing.images(keywords=query, max_results=max_results)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.get("/news", response_model=List[BingNewsResult], summary="Search for news articles")
 async def news_search(
     query: str = Query(..., description="The search keywords for news."),
     max_results: int = Query(10, description="Maximum number of news results to return."),
 ):
     try:
         return await bing.news(keywords=query, max_results=max_results)
     except Exception as e:
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)