| | """Util that calls Google Search using the Serper.dev API.""" |
| |
|
| | from typing import Any, Dict, List, Optional |
| |
|
| | import aiohttp |
| | import requests |
| | from langchain_core.pydantic_v1 import BaseModel, root_validator |
| | from langchain_core.utils import get_from_dict_or_env |
| | from typing_extensions import Literal |
| |
|
| | import requests |
| | import json |
| |
|
| | def check_link_no_redirect(url): |
| | try: |
| | |
| | response = requests.head(url, allow_redirects=False, timeout=0.3) |
| |
|
| | |
| | if response.status_code == 200: |
| | return True |
| | elif 300 <= response.status_code < 400: |
| | return False |
| | else: |
| | return False |
| | except requests.exceptions.Timeout: |
| | return False |
| | except requests.exceptions.RequestException as e: |
| | return False |
| |
|
| |
|
| |
|
| |
|
| | class GoogleSerperAPIWrapper(BaseModel): |
| | """Wrapper around the Serper.dev Google Search API. |
| | |
| | You can create a free API key at https://serper.dev. |
| | |
| | To use, you should have the environment variable ``SERPER_API_KEY`` |
| | set with your API key, or pass `serper_api_key` as a named parameter |
| | to the constructor. |
| | |
| | Example: |
| | .. code-block:: python |
| | |
| | from langchain_community.utilities import GoogleSerperAPIWrapper |
| | google_serper = GoogleSerperAPIWrapper() |
| | """ |
| |
|
| | k: int = 10 |
| | gl: str = "us" |
| | hl: str = "en" |
| | |
| | |
| | type: Literal["news", "search", "places", "images"] = "search" |
| | result_key_for_type = { |
| | "news": "news", |
| | "places": "places", |
| | "images": "images", |
| | "search": "organic", |
| | } |
| |
|
| | tbs: Optional[str] = None |
| | serper_api_key: Optional[str] = None |
| | aiosession: Optional[aiohttp.ClientSession] = None |
| |
|
| | class Config: |
| | """Configuration for this pydantic object.""" |
| |
|
| | arbitrary_types_allowed = True |
| |
|
| | @root_validator(pre=True) |
| | def validate_environment(cls, values: Dict) -> Dict: |
| | """Validate that api key exists in environment.""" |
| | serper_api_key = get_from_dict_or_env( |
| | values, "serper_api_key", "SERPER_API_KEY" |
| | ) |
| | values["serper_api_key"] = serper_api_key |
| |
|
| | return values |
| |
|
| | def results(self, query: str, **kwargs: Any) -> Dict: |
| | """Run query through GoogleSearch.""" |
| | return self._google_serper_api_results( |
| | query, |
| | gl=self.gl, |
| | hl=self.hl, |
| | num=self.k, |
| | tbs=self.tbs, |
| | search_type=self.type, |
| | **kwargs, |
| | ) |
| |
|
| | def run(self, query: str, **kwargs: Any) -> str: |
| | """Run query through GoogleSearch and parse result.""" |
| | results = self._google_serper_api_results( |
| | query, |
| | gl=self.gl, |
| | hl=self.hl, |
| | num=self.k, |
| | tbs=self.tbs, |
| | search_type=self.type, |
| | **kwargs, |
| | ) |
| |
|
| | return self._parse_results(results) |
| |
|
| | async def aresults(self, query: str, **kwargs: Any) -> Dict: |
| | """Run query through GoogleSearch.""" |
| | results = await self._async_google_serper_search_results( |
| | query, |
| | gl=self.gl, |
| | hl=self.hl, |
| | num=self.k, |
| | search_type=self.type, |
| | tbs=self.tbs, |
| | **kwargs, |
| | ) |
| | return results |
| |
|
| | async def arun(self, query: str, **kwargs: Any) -> str: |
| | """Run query through GoogleSearch and parse result async.""" |
| | results = await self._async_google_serper_search_results( |
| | query, |
| | gl=self.gl, |
| | hl=self.hl, |
| | num=self.k, |
| | search_type=self.type, |
| | tbs=self.tbs, |
| | **kwargs, |
| | ) |
| |
|
| | return self._parse_results(results) |
| |
|
| | def _parse_snippets(self, results: dict) -> List[str]: |
| | snippets = [] |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | for result in results[self.result_key_for_type[self.type]][: self.k]: |
| | if "snippet" in result: |
| | if not check_link_no_redirect(result['link']): |
| | continue |
| | snippets.append('Snippet: {}\nUrl: {}'.format(result['snippet'],result['link'])) |
| |
|
| | if len(snippets) == 0: |
| | return ["No good Google Search Result was found"] |
| | return snippets |
| |
|
| | def _parse_results(self, results: dict) -> str: |
| | all_res = [] |
| | if self.type == "images": |
| | for image in results["images"][: self.k]: |
| | if not check_link_no_redirect(image['imageUrl']): |
| | continue |
| | all_res.append('Title: {}\nUrl: {}'.format(image['title'], image['imageUrl'])) |
| | return "\n-----\n".join(all_res) |
| | return "\n-----\n".join(self._parse_snippets(results)) |
| |
|
| | def _google_serper_api_results( |
| | self, search_term: str, search_type: str = "search", **kwargs: Any |
| | ) -> dict: |
| | headers = { |
| | "X-API-KEY": self.serper_api_key or "", |
| | "Content-Type": "application/json", |
| | } |
| | params = { |
| | "q": search_term, |
| | **{key: value for key, value in kwargs.items() if value is not None}, |
| | } |
| | response = requests.post( |
| | f"https://google.serper.dev/{search_type}", headers=headers, params=params |
| | ) |
| | response.raise_for_status() |
| | search_results = response.json() |
| | return search_results |
| |
|
| | async def _async_google_serper_search_results( |
| | self, search_term: str, search_type: str = "search", **kwargs: Any |
| | ) -> dict: |
| | headers = { |
| | "X-API-KEY": self.serper_api_key or "", |
| | "Content-Type": "application/json", |
| | } |
| | url = f"https://google.serper.dev/{search_type}" |
| | params = { |
| | "q": search_term, |
| | **{key: value for key, value in kwargs.items() if value is not None}, |
| | } |
| |
|
| | if not self.aiosession: |
| | async with aiohttp.ClientSession() as session: |
| | async with session.post( |
| | url, params=params, headers=headers, raise_for_status=False |
| | ) as response: |
| | search_results = await response.json() |
| | else: |
| | async with self.aiosession.post( |
| | url, params=params, headers=headers, raise_for_status=True |
| | ) as response: |
| | search_results = await response.json() |
| |
|
| | return search_results |
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | from youtube_search import YoutubeSearch |
| | def get_youtube_url(query:str) -> str: |
| | num_results = 4 |
| | results = YoutubeSearch(query, num_results).to_json() |
| | data = json.loads(results) |
| | all_data = [] |
| | for video in data["videos"][:num_results]: |
| | all_data.append('Title: {}\nUrl: {}'.format(video['title'], 'https://www.youtube.com' + video['url_suffix'])) |
| | return "\n-----\n".join(all_data) |