File size: 5,744 Bytes
01728c5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 | import requests
import json
from typing import List, Dict, Optional
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class WebSearcher:
"""
Serper.dev API integration for web search functionality
"""
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or os.getenv("SERPER_API_KEY")
self.base_url = "https://google.serper.dev/search"
if not self.api_key:
raise ValueError("Serper API key is required. Please set SERPER_API_KEY in your .env file")
def search(self, query: str, num_results: int = 5) -> Dict:
"""
Perform web search using Serper API
Args:
query: Search query
num_results: Number of results to return
Returns:
Dictionary containing search results
"""
headers = {
'X-API-KEY': self.api_key,
'Content-Type': 'application/json'
}
payload = {
'q': query,
'num': num_results,
'page': 1
}
try:
response = requests.post(
self.base_url,
headers=headers,
data=json.dumps(payload),
timeout=10
)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"Web search failed: {str(e)}")
def format_search_results(self, search_response: Dict) -> List[Dict]:
"""
Format search results into a standardized structure
Args:
search_response: Raw response from Serper API
Returns:
List of formatted search results
"""
formatted_results = []
# Process organic results
organic_results = search_response.get('organic', [])
for i, result in enumerate(organic_results):
formatted_result = {
'rank': i + 1,
'title': result.get('title', ''),
'snippet': result.get('snippet', ''),
'link': result.get('link', ''),
'source': result.get('displayLink', ''),
'type': 'organic'
}
formatted_results.append(formatted_result)
# Process answer box if available
answer_box = search_response.get('answerBox')
if answer_box:
formatted_result = {
'rank': 0, # Answer box gets top priority
'title': answer_box.get('title', 'Direct Answer'),
'snippet': answer_box.get('answer', answer_box.get('snippet', '')),
'link': answer_box.get('link', ''),
'source': answer_box.get('displayLink', 'Google'),
'type': 'answer_box'
}
formatted_results.insert(0, formatted_result)
# Process knowledge graph if available
knowledge_graph = search_response.get('knowledgeGraph')
if knowledge_graph:
formatted_result = {
'rank': 0,
'title': knowledge_graph.get('title', 'Knowledge Graph'),
'snippet': knowledge_graph.get('description', ''),
'link': knowledge_graph.get('descriptionLink', ''),
'source': knowledge_graph.get('source', 'Google Knowledge Graph'),
'type': 'knowledge_graph'
}
formatted_results.insert(0 if not answer_box else 1, formatted_result)
return formatted_results
def search_and_format(self, query: str, num_results: int = 5) -> List[Dict]:
"""
Perform search and return formatted results
Args:
query: Search query
num_results: Number of results to return
Returns:
List of formatted search results
"""
try:
# Perform search
search_response = self.search(query, num_results)
# Format results
formatted_results = self.format_search_results(search_response)
return formatted_results
except Exception as e:
print(f"Error in web search: {str(e)}")
return []
def create_search_summary(self, results: List[Dict], max_length: int = 1000) -> str:
"""
Create a summary from search results
Args:
results: List of search results
max_length: Maximum length of summary
Returns:
Summary text with sources
"""
if not results:
return "No web search results found."
summary_parts = []
sources = []
current_length = 0
for result in results[:3]: # Use top 3 results for summary
snippet = result.get('snippet', '')
title = result.get('title', '')
source = result.get('source', '')
link = result.get('link', '')
if snippet and current_length + len(snippet) < max_length:
summary_parts.append(f"**{title}**: {snippet}")
if source and link:
sources.append(f"- [{source}]({link})")
current_length += len(snippet) + len(title) + 4
# Combine summary parts
summary = "\n\n".join(summary_parts)
if sources:
summary += "\n\n**Sources:**\n" + "\n".join(sources)
return summary |