aniket47's picture
Deploy Document Intelligence Chatbot
01728c5
import requests
import json
from typing import List, Dict, Optional
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class WebSearcher:
"""
Serper.dev API integration for web search functionality
"""
def __init__(self, api_key: Optional[str] = None):
self.api_key = api_key or os.getenv("SERPER_API_KEY")
self.base_url = "https://google.serper.dev/search"
if not self.api_key:
raise ValueError("Serper API key is required. Please set SERPER_API_KEY in your .env file")
def search(self, query: str, num_results: int = 5) -> Dict:
"""
Perform web search using Serper API
Args:
query: Search query
num_results: Number of results to return
Returns:
Dictionary containing search results
"""
headers = {
'X-API-KEY': self.api_key,
'Content-Type': 'application/json'
}
payload = {
'q': query,
'num': num_results,
'page': 1
}
try:
response = requests.post(
self.base_url,
headers=headers,
data=json.dumps(payload),
timeout=10
)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise Exception(f"Web search failed: {str(e)}")
def format_search_results(self, search_response: Dict) -> List[Dict]:
"""
Format search results into a standardized structure
Args:
search_response: Raw response from Serper API
Returns:
List of formatted search results
"""
formatted_results = []
# Process organic results
organic_results = search_response.get('organic', [])
for i, result in enumerate(organic_results):
formatted_result = {
'rank': i + 1,
'title': result.get('title', ''),
'snippet': result.get('snippet', ''),
'link': result.get('link', ''),
'source': result.get('displayLink', ''),
'type': 'organic'
}
formatted_results.append(formatted_result)
# Process answer box if available
answer_box = search_response.get('answerBox')
if answer_box:
formatted_result = {
'rank': 0, # Answer box gets top priority
'title': answer_box.get('title', 'Direct Answer'),
'snippet': answer_box.get('answer', answer_box.get('snippet', '')),
'link': answer_box.get('link', ''),
'source': answer_box.get('displayLink', 'Google'),
'type': 'answer_box'
}
formatted_results.insert(0, formatted_result)
# Process knowledge graph if available
knowledge_graph = search_response.get('knowledgeGraph')
if knowledge_graph:
formatted_result = {
'rank': 0,
'title': knowledge_graph.get('title', 'Knowledge Graph'),
'snippet': knowledge_graph.get('description', ''),
'link': knowledge_graph.get('descriptionLink', ''),
'source': knowledge_graph.get('source', 'Google Knowledge Graph'),
'type': 'knowledge_graph'
}
formatted_results.insert(0 if not answer_box else 1, formatted_result)
return formatted_results
def search_and_format(self, query: str, num_results: int = 5) -> List[Dict]:
"""
Perform search and return formatted results
Args:
query: Search query
num_results: Number of results to return
Returns:
List of formatted search results
"""
try:
# Perform search
search_response = self.search(query, num_results)
# Format results
formatted_results = self.format_search_results(search_response)
return formatted_results
except Exception as e:
print(f"Error in web search: {str(e)}")
return []
def create_search_summary(self, results: List[Dict], max_length: int = 1000) -> str:
"""
Create a summary from search results
Args:
results: List of search results
max_length: Maximum length of summary
Returns:
Summary text with sources
"""
if not results:
return "No web search results found."
summary_parts = []
sources = []
current_length = 0
for result in results[:3]: # Use top 3 results for summary
snippet = result.get('snippet', '')
title = result.get('title', '')
source = result.get('source', '')
link = result.get('link', '')
if snippet and current_length + len(snippet) < max_length:
summary_parts.append(f"**{title}**: {snippet}")
if source and link:
sources.append(f"- [{source}]({link})")
current_length += len(snippet) + len(title) + 4
# Combine summary parts
summary = "\n\n".join(summary_parts)
if sources:
summary += "\n\n**Sources:**\n" + "\n".join(sources)
return summary