ash2203 commited on
Commit
b9caa00
·
verified ·
1 Parent(s): e06bf25

Create brave.py

Browse files
Files changed (1) hide show
  1. brave.py +194 -0
brave.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import requests
4
+ from langchain_community.document_loaders import WebBaseLoader
5
+ from groq import Groq
6
+ from bs4 import BeautifulSoup
7
+ import re
8
+ import time
9
+ from tenacity import retry, stop_after_attempt, wait_exponential
10
+ from urllib.parse import urlparse
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # Initialize API clients
16
+ BRAVE_API_KEY = os.getenv("BRAVE_API_KEY")
17
+ BRAVE_SEARCH_URL = "https://api.search.brave.com/res/v1/news/search"
18
+ groq_api_key = os.getenv("GROQ_API_KEY")
19
+
20
+ groq_client = Groq(api_key=groq_api_key)
21
+
22
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
23
+ def clean_content(content):
24
+ # Parse HTML
25
+ soup = BeautifulSoup(content, 'html.parser')
26
+
27
+ # Remove unwanted elements
28
+ for element in soup(['header', 'footer', 'nav', 'aside']):
29
+ element.decompose()
30
+
31
+ # Get text content
32
+ text = soup.get_text()
33
+
34
+ # Remove extra spaces and newlines
35
+ text = re.sub(r'\s+', ' ', text).strip()
36
+
37
+ if not text.strip():
38
+ raise ValueError("No content extracted after cleaning")
39
+
40
+ return text
41
+
42
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
43
+ def summarize_content(content, max_tokens=4000):
44
+ summarization_prompt = f"""Summarize the following content, preserving important details, facts, and figures. This summary will be used for research and news purposes, so accuracy and comprehensiveness are crucial. Keep the summary within approximately {max_tokens} tokens.
45
+
46
+ Content to summarize:
47
+ {content}
48
+
49
+ Summary:"""
50
+
51
+ try:
52
+ chat_completion = groq_client.chat.completions.create(
53
+ messages=[
54
+ {"role": "system", "content": "You are an expert summarizer, capable of condensing information while retaining crucial details."},
55
+ {"role": "user", "content": summarization_prompt}
56
+ ],
57
+ model="llama-3.1-70b-versatile",
58
+ max_tokens=max_tokens,
59
+ )
60
+
61
+ summary = chat_completion.choices[0].message.content
62
+ if not summary.strip():
63
+ raise ValueError("Empty summary received from LLM")
64
+ return summary
65
+ except Exception as e:
66
+ raise ValueError(f"Error in LLM call: {str(e)}")
67
+
68
+ def perform_web_search(query, num_results=2):
69
+ headers = {
70
+ "Accept": "application/json",
71
+ "Accept-Encoding": "gzip",
72
+ "X-Subscription-Token": BRAVE_API_KEY
73
+ }
74
+
75
+ params = {
76
+ "q": query,
77
+ "count": num_results,
78
+ "country": "IN",
79
+ "result_filter": "news"
80
+ }
81
+
82
+ try:
83
+ response = requests.get(BRAVE_SEARCH_URL, headers=headers, params=params)
84
+ response.raise_for_status()
85
+
86
+ results = response.json()
87
+
88
+ print("Raw search results:")
89
+ print(results)
90
+ print("\n" + "-"*50 + "\n")
91
+
92
+ search_results = []
93
+ if 'results' in results:
94
+ for result in results['results']:
95
+ url = result.get('url', '')
96
+ hostname = urlparse(url).netloc
97
+ search_results.append({
98
+ 'url': url,
99
+ 'thumbnail': result.get('thumbnail', {}).get('src', ''),
100
+ 'title': result.get('title', ''),
101
+ 'hostname': hostname
102
+ })
103
+
104
+ if not search_results:
105
+ print("Error: No results found in the search results")
106
+ raise ValueError("No results found in the search results")
107
+
108
+ print("Fetched results:")
109
+ for result in search_results[:num_results]:
110
+ print(f"URL: {result['url']}")
111
+ print(f"Thumbnail: {result['thumbnail']}")
112
+ print(f"Title: {result['title']}")
113
+ print(f"Hostname: {result['hostname']}")
114
+ print("-" * 30)
115
+ print("\n" + "-"*50 + "\n")
116
+
117
+ return search_results[:num_results]
118
+ except Exception as e:
119
+ print(f"Error in perform_web_search: {str(e)}")
120
+ raise
121
+
122
+ def load_web_content(urls):
123
+ loader = WebBaseLoader(urls)
124
+ documents = loader.load()
125
+ cleaned_contents = []
126
+ summarized_contents = []
127
+
128
+ for i, doc in enumerate(documents):
129
+ try:
130
+ cleaned_content = clean_content(doc.page_content)
131
+ cleaned_contents.append(cleaned_content)
132
+ print(f"Cleaned content for URL {i+1}:")
133
+ print(cleaned_content[:500] + "..." if len(cleaned_content) > 500 else cleaned_content)
134
+ print("\n" + "-"*50 + "\n")
135
+
136
+ summarized_content = summarize_content(cleaned_content)
137
+ summarized_contents.append(summarized_content)
138
+ print(f"Summarized content for URL {i+1}:")
139
+ print(summarized_content)
140
+ print("\n" + "-"*50 + "\n")
141
+ except Exception as e:
142
+ print(f"Error processing content for URL {i+1}: {str(e)}")
143
+
144
+ if not summarized_contents:
145
+ print("Error: No content could be processed")
146
+ raise ValueError("No content could be processed")
147
+
148
+ return summarized_contents
149
+
150
+ @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
151
+ def generate_detailed_explanation(query, context):
152
+ prompt = f"""Based on the following summarized context, provide a detailed explanation of the topic. Make sure to incorporate all relevant details, facts, and figures from the context.
153
+ Here's the topic: "{query}".
154
+
155
+ Use this Context to answer the above query:
156
+ {context}
157
+
158
+ Detailed Explanation:"""
159
+
160
+ try:
161
+ chat_completion = groq_client.chat.completions.create(
162
+ messages=[
163
+ {"role": "system", "content": "You are a knowledgeable assistant that provides detailed explanations on various topics, incorporating all relevant information from the given context."},
164
+ {"role": "user", "content": prompt}
165
+ ],
166
+ model="llama-3.1-8b-instant",
167
+ max_tokens=7000, # Reduced to stay within the 8000 token limit
168
+ )
169
+
170
+ explanation = chat_completion.choices[0].message.content
171
+ if not explanation.strip():
172
+ print("Error: Empty explanation received from LLM")
173
+ raise ValueError("Empty explanation received from LLM")
174
+ return explanation
175
+ except Exception as e:
176
+ print(f"Error in generate_detailed_explanation: {str(e)}")
177
+ raise
178
+
179
+ def main():
180
+ query = input("Enter the topic you want to learn about: ")
181
+ search_results = perform_web_search(query)
182
+ print("Search results:", search_results, '\n')
183
+ print('-'*50)
184
+
185
+ web_content = load_web_content(search_results)
186
+ print("Summarized web content: ", web_content, '\n')
187
+ print('-'*50)
188
+
189
+ detailed_explanation = generate_detailed_explanation(query, web_content)
190
+ print(f"Detailed Explanation:\n\n{detailed_explanation}")
191
+
192
+ if __name__ == "__main__":
193
+ main()
194
+