File size: 5,744 Bytes
01728c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import requests
import json
from typing import List, Dict, Optional
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

class WebSearcher:
    """
    Serper.dev API integration for web search functionality
    """
    
    def __init__(self, api_key: Optional[str] = None):
        self.api_key = api_key or os.getenv("SERPER_API_KEY")
        self.base_url = "https://google.serper.dev/search"
        
        if not self.api_key:
            raise ValueError("Serper API key is required. Please set SERPER_API_KEY in your .env file")
    
    def search(self, query: str, num_results: int = 5) -> Dict:
        """
        Perform web search using Serper API
        
        Args:
            query: Search query
            num_results: Number of results to return
            
        Returns:
            Dictionary containing search results
        """
        headers = {
            'X-API-KEY': self.api_key,
            'Content-Type': 'application/json'
        }
        
        payload = {
            'q': query,
            'num': num_results,
            'page': 1
        }
        
        try:
            response = requests.post(
                self.base_url,
                headers=headers,
                data=json.dumps(payload),
                timeout=10
            )
            
            response.raise_for_status()
            return response.json()
            
        except requests.exceptions.RequestException as e:
            raise Exception(f"Web search failed: {str(e)}")
    
    def format_search_results(self, search_response: Dict) -> List[Dict]:
        """
        Format search results into a standardized structure
        
        Args:
            search_response: Raw response from Serper API
            
        Returns:
            List of formatted search results
        """
        formatted_results = []
        
        # Process organic results
        organic_results = search_response.get('organic', [])
        
        for i, result in enumerate(organic_results):
            formatted_result = {
                'rank': i + 1,
                'title': result.get('title', ''),
                'snippet': result.get('snippet', ''),
                'link': result.get('link', ''),
                'source': result.get('displayLink', ''),
                'type': 'organic'
            }
            formatted_results.append(formatted_result)
        
        # Process answer box if available
        answer_box = search_response.get('answerBox')
        if answer_box:
            formatted_result = {
                'rank': 0,  # Answer box gets top priority
                'title': answer_box.get('title', 'Direct Answer'),
                'snippet': answer_box.get('answer', answer_box.get('snippet', '')),
                'link': answer_box.get('link', ''),
                'source': answer_box.get('displayLink', 'Google'),
                'type': 'answer_box'
            }
            formatted_results.insert(0, formatted_result)
        
        # Process knowledge graph if available
        knowledge_graph = search_response.get('knowledgeGraph')
        if knowledge_graph:
            formatted_result = {
                'rank': 0,
                'title': knowledge_graph.get('title', 'Knowledge Graph'),
                'snippet': knowledge_graph.get('description', ''),
                'link': knowledge_graph.get('descriptionLink', ''),
                'source': knowledge_graph.get('source', 'Google Knowledge Graph'),
                'type': 'knowledge_graph'
            }
            formatted_results.insert(0 if not answer_box else 1, formatted_result)
        
        return formatted_results
    
    def search_and_format(self, query: str, num_results: int = 5) -> List[Dict]:
        """
        Perform search and return formatted results
        
        Args:
            query: Search query
            num_results: Number of results to return
            
        Returns:
            List of formatted search results
        """
        try:
            # Perform search
            search_response = self.search(query, num_results)
            
            # Format results
            formatted_results = self.format_search_results(search_response)
            
            return formatted_results
            
        except Exception as e:
            print(f"Error in web search: {str(e)}")
            return []
    
    def create_search_summary(self, results: List[Dict], max_length: int = 1000) -> str:
        """
        Create a summary from search results
        
        Args:
            results: List of search results
            max_length: Maximum length of summary
            
        Returns:
            Summary text with sources
        """
        if not results:
            return "No web search results found."
        
        summary_parts = []
        sources = []
        current_length = 0
        
        for result in results[:3]:  # Use top 3 results for summary
            snippet = result.get('snippet', '')
            title = result.get('title', '')
            source = result.get('source', '')
            link = result.get('link', '')
            
            if snippet and current_length + len(snippet) < max_length:
                summary_parts.append(f"**{title}**: {snippet}")
                if source and link:
                    sources.append(f"- [{source}]({link})")
                current_length += len(snippet) + len(title) + 4
        
        # Combine summary parts
        summary = "\n\n".join(summary_parts)
        
        if sources:
            summary += "\n\n**Sources:**\n" + "\n".join(sources)
        
        return summary