| | import requests |
| | import json |
| | from typing import List, Dict, Optional |
| | import os |
| | from dotenv import load_dotenv |
| |
|
| | |
| | load_dotenv() |
| |
|
| | class WebSearcher: |
| | """ |
| | Serper.dev API integration for web search functionality |
| | """ |
| | |
| | def __init__(self, api_key: Optional[str] = None): |
| | self.api_key = api_key or os.getenv("SERPER_API_KEY") |
| | self.base_url = "https://google.serper.dev/search" |
| | |
| | if not self.api_key: |
| | raise ValueError("Serper API key is required. Please set SERPER_API_KEY in your .env file") |
| | |
| | def search(self, query: str, num_results: int = 5) -> Dict: |
| | """ |
| | Perform web search using Serper API |
| | |
| | Args: |
| | query: Search query |
| | num_results: Number of results to return |
| | |
| | Returns: |
| | Dictionary containing search results |
| | """ |
| | headers = { |
| | 'X-API-KEY': self.api_key, |
| | 'Content-Type': 'application/json' |
| | } |
| | |
| | payload = { |
| | 'q': query, |
| | 'num': num_results, |
| | 'page': 1 |
| | } |
| | |
| | try: |
| | response = requests.post( |
| | self.base_url, |
| | headers=headers, |
| | data=json.dumps(payload), |
| | timeout=10 |
| | ) |
| | |
| | response.raise_for_status() |
| | return response.json() |
| | |
| | except requests.exceptions.RequestException as e: |
| | raise Exception(f"Web search failed: {str(e)}") |
| | |
| | def format_search_results(self, search_response: Dict) -> List[Dict]: |
| | """ |
| | Format search results into a standardized structure |
| | |
| | Args: |
| | search_response: Raw response from Serper API |
| | |
| | Returns: |
| | List of formatted search results |
| | """ |
| | formatted_results = [] |
| | |
| | |
| | organic_results = search_response.get('organic', []) |
| | |
| | for i, result in enumerate(organic_results): |
| | formatted_result = { |
| | 'rank': i + 1, |
| | 'title': result.get('title', ''), |
| | 'snippet': result.get('snippet', ''), |
| | 'link': result.get('link', ''), |
| | 'source': result.get('displayLink', ''), |
| | 'type': 'organic' |
| | } |
| | formatted_results.append(formatted_result) |
| | |
| | |
| | answer_box = search_response.get('answerBox') |
| | if answer_box: |
| | formatted_result = { |
| | 'rank': 0, |
| | 'title': answer_box.get('title', 'Direct Answer'), |
| | 'snippet': answer_box.get('answer', answer_box.get('snippet', '')), |
| | 'link': answer_box.get('link', ''), |
| | 'source': answer_box.get('displayLink', 'Google'), |
| | 'type': 'answer_box' |
| | } |
| | formatted_results.insert(0, formatted_result) |
| | |
| | |
| | knowledge_graph = search_response.get('knowledgeGraph') |
| | if knowledge_graph: |
| | formatted_result = { |
| | 'rank': 0, |
| | 'title': knowledge_graph.get('title', 'Knowledge Graph'), |
| | 'snippet': knowledge_graph.get('description', ''), |
| | 'link': knowledge_graph.get('descriptionLink', ''), |
| | 'source': knowledge_graph.get('source', 'Google Knowledge Graph'), |
| | 'type': 'knowledge_graph' |
| | } |
| | formatted_results.insert(0 if not answer_box else 1, formatted_result) |
| | |
| | return formatted_results |
| | |
| | def search_and_format(self, query: str, num_results: int = 5) -> List[Dict]: |
| | """ |
| | Perform search and return formatted results |
| | |
| | Args: |
| | query: Search query |
| | num_results: Number of results to return |
| | |
| | Returns: |
| | List of formatted search results |
| | """ |
| | try: |
| | |
| | search_response = self.search(query, num_results) |
| | |
| | |
| | formatted_results = self.format_search_results(search_response) |
| | |
| | return formatted_results |
| | |
| | except Exception as e: |
| | print(f"Error in web search: {str(e)}") |
| | return [] |
| | |
| | def create_search_summary(self, results: List[Dict], max_length: int = 1000) -> str: |
| | """ |
| | Create a summary from search results |
| | |
| | Args: |
| | results: List of search results |
| | max_length: Maximum length of summary |
| | |
| | Returns: |
| | Summary text with sources |
| | """ |
| | if not results: |
| | return "No web search results found." |
| | |
| | summary_parts = [] |
| | sources = [] |
| | current_length = 0 |
| | |
| | for result in results[:3]: |
| | snippet = result.get('snippet', '') |
| | title = result.get('title', '') |
| | source = result.get('source', '') |
| | link = result.get('link', '') |
| | |
| | if snippet and current_length + len(snippet) < max_length: |
| | summary_parts.append(f"**{title}**: {snippet}") |
| | if source and link: |
| | sources.append(f"- [{source}]({link})") |
| | current_length += len(snippet) + len(title) + 4 |
| | |
| | |
| | summary = "\n\n".join(summary_parts) |
| | |
| | if sources: |
| | summary += "\n\n**Sources:**\n" + "\n".join(sources) |
| | |
| | return summary |